diff --git a/ee/lib/gitlab/llm/ai_gateway/client.rb b/ee/lib/gitlab/llm/ai_gateway/client.rb
index 4057cefe03a193e624bd742fd3bce26eb5868c27..2b463ee790f3a17bcb400968ab9e01f692d5241b 100644
--- a/ee/lib/gitlab/llm/ai_gateway/client.rb
+++ b/ee/lib/gitlab/llm/ai_gateway/client.rb
@@ -27,7 +27,7 @@ def complete(url:, body:, timeout: DEFAULT_TIMEOUT)
             perform_completion_request(url: url, body: body, timeout: timeout, stream: false)
           end
 
-          log_response_received(response.parsed_response)
+          log_response_received(response.parsed_response) if response&.success?
 
           response
         end
diff --git a/ee/lib/gitlab/llm/ai_gateway/completions/base.rb b/ee/lib/gitlab/llm/ai_gateway/completions/base.rb
index ba33a26784e628ad4dcfc4251fa90000d4654f1d..50dc8f519a785fd2abc118b65ee23b1d350ccba2 100644
--- a/ee/lib/gitlab/llm/ai_gateway/completions/base.rb
+++ b/ee/lib/gitlab/llm/ai_gateway/completions/base.rb
@@ -45,7 +45,10 @@ def request!
               body: { 'inputs' => inputs }
             )
 
-            Gitlab::Json.parse(response.body)
+            return if response&.body.blank?
+            return Gitlab::Json.parse(response.body) if response&.success?
+
+            { 'detail' => DEFAULT_ERROR }
           rescue ArgumentError => e
             { 'detail' => e.message }
           rescue StandardError => e
diff --git a/ee/lib/gitlab/llm/anthropic/client.rb b/ee/lib/gitlab/llm/anthropic/client.rb
index 46c50d15354a9fdbdaeeef31662e936b6ebf4518..9e81529a276771cb3c8b094466c175bfe82b80e0 100644
--- a/ee/lib/gitlab/llm/anthropic/client.rb
+++ b/ee/lib/gitlab/llm/anthropic/client.rb
@@ -29,6 +29,12 @@ def complete(prompt:, **options)
             perform_completion_request(prompt: prompt, options: options.except(:stream))
           end
 
+          # retry_with_exponential_backoff will return nil if a 5xx server request is returned.
+          # If the client returns a 4xx response, we still want to return the response
+          # in case the caller wants to parse the errors: https://docs.anthropic.com/en/api/errors
+          return unless response&.parsed_response
+          return response unless response.success?
+
           response_completion = response["completion"]
           log_response_received(response_completion, 'completion')
 
@@ -67,7 +73,13 @@ def messages_complete(messages:, **options)
             perform_messages_request(messages: messages, options: options.except(:stream))
           end
 
-          response_completion = response&.dig('content', 0, 'text')
+          # retry_with_exponential_backoff will return nil if a 5xx server request is returned.
+          # If the client returns a 4xx response, we still want to return the response
+          # in case the caller wants to parse the errors: https://docs.anthropic.com/en/api/errors
+          return unless response&.parsed_response
+          return response unless response.success?
+
+          response_completion = response.dig('content', 0, 'text')
           log_response_received(response_completion, 'messages')
 
           track_prompt_size(token_size(messages))
diff --git a/ee/lib/gitlab/llm/chain/tools/embeddings_completion.rb b/ee/lib/gitlab/llm/chain/tools/embeddings_completion.rb
index 51626369f2b8e527e6ce8d78a105af7da3287c2a..bc3ee4efbb137580b3a4e887a7b04658b0200dde 100644
--- a/ee/lib/gitlab/llm/chain/tools/embeddings_completion.rb
+++ b/ee/lib/gitlab/llm/chain/tools/embeddings_completion.rb
@@ -29,9 +29,11 @@ def execute(&)
 
           def get_search_results(question)
             response = Gitlab::Llm::AiGateway::DocsClient.new(current_user)
-              .search(query: question) || {}
+              .search(query: question)
 
-            response.dig('response', 'results')&.map(&:with_indifferent_access)
+            return {} unless response&.success? && response.parsed_response
+
+            response.parsed_response.dig('response', 'results')&.map(&:with_indifferent_access)
           end
 
           private
@@ -53,6 +55,11 @@ def get_completions_ai_gateway(search_documents)
               yield data if block_given?
             end
 
+            unless final_prompt_result&.success? && final_prompt_result.parsed_response
+              final_prompt_error = "Error retrieving completions with final prompt: #{final_prompt_result}"
+              raise Gitlab::Llm::AiGateway::Client::ConnectionError, final_prompt_error
+            end
+
             log_conditional_info(current_user,
               message: "Got Final Result for documentation question content",
               event_name: 'response_received',
diff --git a/ee/lib/gitlab/llm/concerns/exponential_backoff.rb b/ee/lib/gitlab/llm/concerns/exponential_backoff.rb
index 447b4ce48dbaba55c4e5d4bbf8577f3ba007c05a..1ecf4d53a5b8891c07fd2569794c9862a25b3148 100644
--- a/ee/lib/gitlab/llm/concerns/exponential_backoff.rb
+++ b/ee/lib/gitlab/llm/concerns/exponential_backoff.rb
@@ -42,8 +42,7 @@ def run_retry_with_exponential_backoff
 
             return unless response.present?
 
-            http_response = response.response
-            return if http_response.nil? || http_response.body.blank?
+            return if response.response.nil?
 
             raise Gitlab::CircuitBreaker::InternalServerError if response.server_error?
 
diff --git a/ee/lib/gitlab/llm/vertex_ai/client.rb b/ee/lib/gitlab/llm/vertex_ai/client.rb
index eeed9ce18b31539075649eee4f00185eb252e499..21c3ee549cf773a02ff7920e99121fb4696cd57a 100644
--- a/ee/lib/gitlab/llm/vertex_ai/client.rb
+++ b/ee/lib/gitlab/llm/vertex_ai/client.rb
@@ -141,14 +141,26 @@ def request(content:, config:, **options)
             ai_component: 'abstraction_layer',
             unit_primitive: unit_primitive)
 
-          if response.present?
-            track_token_usage(response)
-          else
+          # 500 errors will return as nil. This preserves the previous behavior
+          # of returning nil if a blank body is detected.
+          if response.nil? || response&.body.blank?
             log_error(message: "Empty response from Vertex",
               event_name: 'empty_response_received',
               ai_component: 'abstraction_layer',
               unit_primitive: unit_primitive
             )
+            return
+          end
+
+          if response.success? && response.parsed_response.present?
+            track_token_usage(response)
+          else
+            log_error(message: "Failed response from Vertex",
+              event_name: 'failed_response_received',
+              ai_component: 'abstraction_layer',
+              unit_primitive: unit_primitive,
+              status: response
+            )
           end
 
           response
diff --git a/ee/spec/lib/gitlab/llm/ai_gateway/completions/base_spec.rb b/ee/spec/lib/gitlab/llm/ai_gateway/completions/base_spec.rb
index c05a386c5bed84061660cd7b1d54606b6806a734..4b96dd37a72e8c6e2ed4438249a8ca54081203a8 100644
--- a/ee/spec/lib/gitlab/llm/ai_gateway/completions/base_spec.rb
+++ b/ee/spec/lib/gitlab/llm/ai_gateway/completions/base_spec.rb
@@ -9,8 +9,8 @@
   let(:prompt_message) { build(:ai_message, ai_action: ai_action, user: user, resource: resource) }
   let(:inputs) { { prompt: "What's your name?" } }
   let(:response) { "I'm Duo" }
-  let(:http_response) { instance_double(HTTParty::Response, body: %("#{response}")) }
-  let(:processed_repsonse) { response }
+  let(:http_response) { instance_double(HTTParty::Response, body: %("#{response}"), success?: true) }
+  let(:processed_response) { response }
   let(:response_modifier_class) { Gitlab::Llm::AiGateway::ResponseModifiers::Base }
   let(:response_modifier) { instance_double(Gitlab::Llm::AiGateway::ResponseModifiers::Base) }
   let(:response_service) { instance_double(Gitlab::Llm::GraphqlSubscriptionResponseService) }
@@ -56,7 +56,7 @@
             .and_return(http_response)
         end
 
-        expect(response_modifier_class).to receive(:new).with(processed_repsonse)
+        expect(response_modifier_class).to receive(:new).with(processed_response)
           .and_return(response_modifier)
         expect(Gitlab::Llm::GraphqlSubscriptionResponseService).to receive(:new)
           .with(user, resource, response_modifier, options: response_options).and_return(response_service)
@@ -82,19 +82,19 @@
 
     context 'when the subclass raises an ArgumentError when gathering inputs' do
       let(:http_response) { nil }
-      let(:processed_repsonse) { { 'detail' => 'Something went wrong.' } }
+      let(:processed_response) { { 'detail' => 'Something went wrong.' } }
 
       before do
         subclass.define_method(:inputs) { raise ArgumentError, 'Something went wrong.' }
       end
 
       # Note: The completion "executes successfully" in that it relays the error to the user via GraphQL, which we check
-      # by changing the `let(:processed_repsonse)` in this context
+      # by changing the `let(:processed_response)` in this context
       it_behaves_like 'executing successfully'
     end
 
     context 'when an unexpected error is raised' do
-      let(:processed_repsonse) { { 'detail' => 'An unexpected error has occurred.' } }
+      let(:processed_response) { { 'detail' => 'An unexpected error has occurred.' } }
 
       before do
         allow(Gitlab::Json).to receive(:parse).and_raise(StandardError)
@@ -104,7 +104,7 @@
     end
 
     context 'when the subclass overrides the post_process method' do
-      let(:processed_repsonse) { response.upcase }
+      let(:processed_response) { response.upcase }
 
       before do
         subclass.define_method(:post_process) { |response| response.upcase }
diff --git a/ee/spec/lib/gitlab/llm/ai_gateway/completions/generate_description_spec.rb b/ee/spec/lib/gitlab/llm/ai_gateway/completions/generate_description_spec.rb
index 6b3c73b972dcf63b76ceba61f23fa5119434f261..b8ea746e688dae7ac66b8d92160bcbc6de0ddaa8 100644
--- a/ee/spec/lib/gitlab/llm/ai_gateway/completions/generate_description_spec.rb
+++ b/ee/spec/lib/gitlab/llm/ai_gateway/completions/generate_description_spec.rb
@@ -11,7 +11,7 @@
   let(:ai_options) { { content: content, description_template_name: description_template_name } }
   let(:template_class) { ::Gitlab::Llm::Templates::GenerateDescription }
   let(:ai_client) { instance_double(Gitlab::Llm::AiGateway::Client) }
-  let(:ai_response) { instance_double(HTTParty::Response, body: %("Success")) }
+  let(:ai_response) { instance_double(HTTParty::Response, body: %("Success"), success?: true) }
   let(:uuid) { SecureRandom.uuid }
   let(:prompt_message) do
     build(:ai_message, :generate_description, user: user, resource: issuable, content: content, request_id: uuid)
@@ -43,6 +43,26 @@
 
       expect(generate_description[:ai_message].content).to eq("Success")
     end
+
+    context 'with an unsuccessful request' do
+      let(:ai_response) { instance_double(HTTParty::Response, body: %("Failed"), success?: false) }
+
+      it 'returns an error' do
+        expect(Gitlab::Llm::AiGateway::Client).to receive(:new).with(
+          user,
+          service_name: :generate_description,
+          tracking_context: tracking_context
+        )
+        expect(ai_client).to receive(:complete).with(
+          url: "#{Gitlab::AiGateway.url}/v1/prompts/generate_description",
+          body: { 'inputs' => { content: content, template: expected_template } }
+        ).and_return(ai_response)
+
+        expect(::Gitlab::Llm::GraphqlSubscriptionResponseService).to receive(:new).and_call_original
+
+        expect(generate_description[:ai_message].content).to eq({ "detail" => "An unexpected error has occurred." })
+      end
+    end
   end
 
   describe "#execute" do
diff --git a/ee/spec/lib/gitlab/llm/ai_gateway/completions/summarize_review_spec.rb b/ee/spec/lib/gitlab/llm/ai_gateway/completions/summarize_review_spec.rb
index e9b6ed502c18524fe4814b57148a8fdb970ca5d6..2de5da214ea323a2fa51411aef27d796efc61bee 100644
--- a/ee/spec/lib/gitlab/llm/ai_gateway/completions/summarize_review_spec.rb
+++ b/ee/spec/lib/gitlab/llm/ai_gateway/completions/summarize_review_spec.rb
@@ -36,7 +36,7 @@
       end
 
       let(:example_answer) { { "response" => "AI generated review summary" } }
-      let(:example_response) { instance_double(HTTParty::Response, body: example_answer.to_json) }
+      let(:example_response) { instance_double(HTTParty::Response, body: example_answer.to_json, success?: true) }
 
       shared_examples_for 'summarize review' do
         it 'publishes the content from the AI response' do
diff --git a/ee/spec/lib/gitlab/llm/chain/tools/embeddings_completion_spec.rb b/ee/spec/lib/gitlab/llm/chain/tools/embeddings_completion_spec.rb
index b78f4ce089b50db0f0186616bb5c3128945e82ac..7e1564675ab08a53e95a3c017ead1ed16d36e730 100644
--- a/ee/spec/lib/gitlab/llm/chain/tools/embeddings_completion_spec.rb
+++ b/ee/spec/lib/gitlab/llm/chain/tools/embeddings_completion_spec.rb
@@ -16,12 +16,17 @@
   let(:instance) { described_class.new(current_user: user, question: question) }
   let(:ai_gateway_request) { ::Gitlab::Llm::Chain::Requests::AiGateway.new(user) }
   let(:attrs) { embeddings.pluck(:id).map { |x| "CNT-IDX-#{x}" }.join(", ") }
-  let(:completion_response) { { 'response' => "#{answer} ATTRS: #{attrs}" } }
+  let(:completion_body) { { 'response' => "#{answer} ATTRS: #{attrs}" } }
+  let(:completion_response) do
+    instance_double(HTTParty::Response, code: 200, success?: true, body: completion_body.to_json,
+      parsed_response: completion_body)
+  end
+
   let(:model) { ::Gitlab::Llm::Anthropic::Client::CLAUDE_3_5_SONNET }
 
   let(:docs_search_client) { ::Gitlab::Llm::AiGateway::DocsClient.new(user) }
   let(:docs_search_args) { { query: question } }
-  let(:docs_search_response) do
+  let(:docs_search_body) do
     {
       'response' => {
         'results' => [
@@ -35,6 +40,12 @@
     }
   end
 
+  let(:docs_search_body_json) { docs_search_body.to_json }
+  let(:docs_search_response) do
+    instance_double(HTTParty::Response, success?: true, code: 200, body: docs_search_body_json,
+      parsed_response: docs_search_body)
+  end
+
   describe '#execute' do
     subject(:execute) { instance.execute }
 
@@ -87,6 +98,18 @@
       execute
     end
 
+    it 'raises an error when final prompt request fails' do
+      expect(ai_gateway_request).to receive(:request)
+        .with({ prompt: instance_of(Array),
+          options: { model: model, max_tokens: 256 } })
+        .once.and_return(nil)
+      expect(docs_search_client).to receive(:search).with(**docs_search_args).and_return(docs_search_response)
+
+      expect(logger).to receive(:error).with(a_hash_including(message: "Streaming error", error: anything))
+
+      execute
+    end
+
     context 'when user has AI features disabled' do
       before do
         allow(::Gitlab::Llm::TanukiBot).to receive(:enabled_for?).with(user: user).and_return(false)
@@ -106,7 +129,7 @@
     end
 
     context 'when no documents are found' do
-      let(:docs_search_response) { {} }
+      let(:docs_search_body) { {} }
 
       it 'returns an empty response message' do
         expect(execute.response_body).to eq(empty_response_message)
@@ -114,7 +137,8 @@
     end
 
     context 'when DocsClient returns nil' do
-      let(:docs_search_response) { nil }
+      let(:docs_search_body) { nil }
+      let(:docs_search_body_json) { nil }
 
       it 'returns an empty response message' do
         expect(execute.response_body).to eq(empty_response_message)
diff --git a/ee/spec/lib/gitlab/llm/concerns/exponential_backoff_spec.rb b/ee/spec/lib/gitlab/llm/concerns/exponential_backoff_spec.rb
index e4c660a9920654610d6f1e2fdf892eea004c99f5..3bb30d662ae20d33937fc775a6eb1e15d6684062 100644
--- a/ee/spec/lib/gitlab/llm/concerns/exponential_backoff_spec.rb
+++ b/ee/spec/lib/gitlab/llm/concerns/exponential_backoff_spec.rb
@@ -30,6 +30,14 @@
     instance_double(HTTParty::Response, response: nil)
   end
 
+  let(:no_content_response) do
+    instance_double(HTTParty::Response,
+      code: 204,
+      response: Net::HTTPNoContent.new("1.1", "204", "No Content"),
+      server_error?: false, too_many_requests?: false
+    )
+  end
+
   let(:server_error) do
     instance_double(HTTParty::Response,
       code: 503, success?: false, parsed_response: body,
@@ -186,6 +194,15 @@ def dummy_method(response_caller)
       end
     end
 
+    context 'when the function response is no content' do
+      it 'does not retry the function' do
+        allow(response_caller).to receive(:call).and_return(no_content_response)
+
+        expect(subject).to eq(no_content_response)
+        expect(response_caller).to have_received(:call).once
+      end
+    end
+
     context 'when the function response is a server error' do
       it 'returns a nil response' do
         allow(response_caller).to receive(:call).and_return(server_error)
diff --git a/ee/spec/lib/gitlab/llm/vertex_ai/client_spec.rb b/ee/spec/lib/gitlab/llm/vertex_ai/client_spec.rb
index 8c755d776e0cfbbcaa7a9ab35e084620ca2cfc61..8aa2317ba40fd76825097611feff1547365c5deb 100644
--- a/ee/spec/lib/gitlab/llm/vertex_ai/client_spec.rb
+++ b/ee/spec/lib/gitlab/llm/vertex_ai/client_spec.rb
@@ -136,6 +136,16 @@
       end
     end
 
+    context 'when a 403 error is returned from the API' do
+      before do
+        stub_request(:post, url).to_return(status: 403, body: "403 Unauthorized")
+      end
+
+      it 'returns a 403 response' do
+        expect(response.code).to eq(403)
+      end
+    end
+
     context 'when a failed response is returned from the API' do
       include_context 'when a failed response is returned from the API'
 
diff --git a/ee/spec/support/shared_examples/lib/gitlab/llm/anthropic/client_shared_examples.rb b/ee/spec/support/shared_examples/lib/gitlab/llm/anthropic/client_shared_examples.rb
index 42036382d2dc684715eec55578b02038bd4d5da1..1ef9002852a07787bccf84281a1627b3fd9d4f40 100644
--- a/ee/spec/support/shared_examples/lib/gitlab/llm/anthropic/client_shared_examples.rb
+++ b/ee/spec/support/shared_examples/lib/gitlab/llm/anthropic/client_shared_examples.rb
@@ -93,6 +93,53 @@
         it_behaves_like 'measured Llm request with error', StandardError
       end
 
+      context 'when response is a 500 error' do
+        let(:http_status) { 500 }
+        let(:response_body) { nil }
+        let(:response_headers) { nil }
+
+        it 'returns nil' do
+          expect(complete).to be_nil
+        end
+      end
+
+      context 'when response is 204 No Content' do
+        let(:http_status) { 204 }
+        let(:response_body) { nil }
+        let(:response_headers) { nil }
+
+        it_behaves_like 'measured Llm request'
+      end
+
+      context 'when response is a bad request with no body' do
+        let(:http_status) { 400 }
+        let(:response_body) { nil }
+        let(:response_headers) { nil }
+
+        it 'returns nil' do
+          expect(complete).to be_nil
+        end
+      end
+
+      context 'when response is a an authentication error' do
+        let(:http_status) { 401 }
+        let(:response_body) do
+          {
+            "type" => "error",
+            "error" => {
+              "type" => "authentication_error",
+              "message" => "There’s an issue with your API key."
+            }
+          }.to_json
+        end
+
+        it 'returns a response' do
+          expect(complete).to be_present
+          expect(complete.parsed_response['type']).to eq('error')
+          expect(complete.parsed_response['error']['type']).to eq('authentication_error')
+        end
+      end
+
       context 'when request is retried' do
         let(:http_status) { 429 }
 
@@ -425,6 +472,35 @@
         end
       end
 
+      context 'when response is a bad request with no body' do
+        let(:http_status) { 400 }
+        let(:response_body) { nil }
+        let(:response_headers) { nil }
+
+        it 'returns nil' do
+          expect(subject).to be_nil
+        end
+      end
+
+      context 'when response is a an authentication error' do
+        let(:http_status) { 401 }
+        let(:response_body) do
+          {
+            "type" => "error",
+            "error" => {
+              "type" => "authentication_error",
+              "message" => "There’s an issue with your API key."
+            }
+          }.to_json
+        end
+
+        it 'returns a response' do
+          expect(subject).to be_present
+          expect(subject.parsed_response['type']).to eq('error')
+          expect(subject.parsed_response['error']['type']).to eq('authentication_error')
+        end
+      end
+
       context 'when request is retried once' do
         before do
           stub_request(:post, "#{ai_gateway_url}/v1/proxy/anthropic/v1/messages")