From 0c6118892eab87317f1cf38b3de566e25a04854a Mon Sep 17 00:00:00 2001
From: Bojan Marjanovic <bmarjanovic@gitlab.com>
Date: Fri, 21 Apr 2023 09:12:39 +0000
Subject: [PATCH] Introduce circuit breaker for OpenAI client

Adds a circuitbox gem, and wraps around OpenAI client calls

Changelog: added
---
 Gemfile                                       |  1 +
 Gemfile.checksum                              |  1 +
 Gemfile.lock                                  |  2 +
 config/circuitbox.rb                          |  6 ++
 config/feature_flags/ops/circuit_breaker.yml  |  8 +++
 ee/lib/gitlab/circuit_breaker/notifier.rb     | 24 ++++++++
 ee/lib/gitlab/llm/concerns/circuit_breaker.rb | 36 +++++++++++
 .../gitlab/llm/open_ai/exponential_backoff.rb | 20 ++++++-
 .../gitlab/circuit_breaker/notifier_spec.rb   | 27 +++++++++
 .../llm/concerns/circuit_breaker_spec.rb      | 60 +++++++++++++++++++
 ee/spec/lib/gitlab/llm/open_ai/client_spec.rb |  1 +
 .../llm/open_ai/exponential_backoff_spec.rb   | 31 +++++++++-
 .../llm/circuit_breaker_shared_examples.rb    | 10 ++++
 13 files changed, 222 insertions(+), 5 deletions(-)
 create mode 100644 config/circuitbox.rb
 create mode 100644 config/feature_flags/ops/circuit_breaker.yml
 create mode 100644 ee/lib/gitlab/circuit_breaker/notifier.rb
 create mode 100644 ee/lib/gitlab/llm/concerns/circuit_breaker.rb
 create mode 100644 ee/spec/lib/gitlab/circuit_breaker/notifier_spec.rb
 create mode 100644 ee/spec/lib/gitlab/llm/concerns/circuit_breaker_spec.rb
 create mode 100644 ee/spec/support/shared_examples/lib/gitlab/llm/circuit_breaker_shared_examples.rb

diff --git a/Gemfile b/Gemfile
index f16ab32d11fb0..df86744eee15e 100644
--- a/Gemfile
+++ b/Gemfile
@@ -287,6 +287,7 @@ gem 'kubeclient', '~> 4.11.0'
 
 # AI
 gem 'ruby-openai', '~> 3.7'
+gem 'circuitbox', '2.0.0.pre5'
 
 # Sanitize user input
 gem 'sanitize', '~> 6.0'
diff --git a/Gemfile.checksum b/Gemfile.checksum
index 73fb708a38c0f..1458f2f442e2b 100644
--- a/Gemfile.checksum
+++ b/Gemfile.checksum
@@ -78,6 +78,7 @@
 {"name":"chef-utils","version":"16.10.17","platform":"ruby","checksum":"a74253da6aab8ff92c955549536bdecbc4d1ce8032c8201576f2a8ef4e8ed7b3"},
 {"name":"childprocess","version":"3.0.0","platform":"ruby","checksum":"4579a87cdc962de252eebf1482a4185fad383ae7dbe29a746ba2be8e261280c5"},
 {"name":"chunky_png","version":"1.3.5","platform":"ruby","checksum":"b6ab1011b2e79bcc973c92deee4110d071d5cd59ed950efcd0aba49a5f57c06d"},
+{"name":"circuitbox","version":"2.0.0.pre5","platform":"ruby","checksum":"033d4820a9b688f0539630b81ae0d707ce8e6ccd34756de31a79063b190ffffc"},
 {"name":"citrus","version":"3.0.2","platform":"ruby","checksum":"4ec2412fc389ad186735f4baee1460f7900a8e130ffe3f216b30d4f9c684f650"},
 {"name":"claide","version":"1.1.0","platform":"ruby","checksum":"6d3c5c089dde904d96aa30e73306d0d4bd444b1accb9b3125ce14a3c0183f82e"},
 {"name":"claide-plugins","version":"0.9.2","platform":"ruby","checksum":"c7ea78bc067ab23bce8515497cdcdcb8f01c86dadfbe13c44644e382922c1c2e"},
diff --git a/Gemfile.lock b/Gemfile.lock
index 87bae618f10d3..2eeadd20d999e 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -305,6 +305,7 @@ GEM
     chef-utils (16.10.17)
     childprocess (3.0.0)
     chunky_png (1.3.5)
+    circuitbox (2.0.0.pre5)
     citrus (3.0.2)
     claide (1.1.0)
     claide-plugins (0.9.2)
@@ -1695,6 +1696,7 @@ DEPENDENCIES
   capybara-screenshot (~> 1.0.26)
   carrierwave (~> 1.3)
   charlock_holmes (~> 0.7.7)
+  circuitbox (= 2.0.0.pre5)
   cloud_profiler_agent (~> 0.0.0)!
   commonmarker (~> 0.23.6)
   concurrent-ruby (~> 1.1)
diff --git a/config/circuitbox.rb b/config/circuitbox.rb
new file mode 100644
index 0000000000000..edce0aefdb855
--- /dev/null
+++ b/config/circuitbox.rb
@@ -0,0 +1,6 @@
+# frozen_string_literal: true
+
+Circuitbox.configure do |config|
+  config.default_circuit_store = Circuitbox::MemoryStore.new
+  config.default_notifier = GitLab::CircuitBreaker::Notifier.new
+end
diff --git a/config/feature_flags/ops/circuit_breaker.yml b/config/feature_flags/ops/circuit_breaker.yml
new file mode 100644
index 0000000000000..a8de0a9252922
--- /dev/null
+++ b/config/feature_flags/ops/circuit_breaker.yml
@@ -0,0 +1,8 @@
+---
+name: circuit_breaker
+introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/117993
+rollout_issue_url:
+milestone: '16.0'
+type: ops
+group: group::ai-enablement
+default_enabled: false
diff --git a/ee/lib/gitlab/circuit_breaker/notifier.rb b/ee/lib/gitlab/circuit_breaker/notifier.rb
new file mode 100644
index 0000000000000..96134098197e8
--- /dev/null
+++ b/ee/lib/gitlab/circuit_breaker/notifier.rb
@@ -0,0 +1,24 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module CircuitBreaker
+    class Notifier
+      CircuitBreakerError = Class.new(RuntimeError)
+
+      def notify(service_name, event)
+        exception = CircuitBreakerError.new("Service #{service_name}: #{event}")
+        exception.set_backtrace(Gitlab::BacktraceCleaner.clean_backtrace(caller))
+
+        Gitlab::ErrorTracking.track_exception(exception)
+      end
+
+      def notify_warning(service_name, message)
+        # no-op
+      end
+
+      def notify_run(service_name, &block)
+        # no-op
+      end
+    end
+  end
+end
diff --git a/ee/lib/gitlab/llm/concerns/circuit_breaker.rb b/ee/lib/gitlab/llm/concerns/circuit_breaker.rb
new file mode 100644
index 0000000000000..4272609bbb26c
--- /dev/null
+++ b/ee/lib/gitlab/llm/concerns/circuit_breaker.rb
@@ -0,0 +1,36 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module Llm
+    module Concerns
+      module CircuitBreaker
+        extend ActiveSupport::Concern
+
+        InternalServerError = Class.new(StandardError)
+
+        ERROR_THRESHOLD = 50
+        VOLUME_THRESHOLD = 10
+
+        included do
+          def circuit
+            @circuit ||= Circuitbox.circuit(service_name, {
+              exceptions: [InternalServerError],
+              error_threshold: ERROR_THRESHOLD,
+              volume_threshold: VOLUME_THRESHOLD
+            })
+          end
+        end
+
+        def run_with_circuit(&block)
+          circuit.run(exception: false, &block)
+        end
+
+        private
+
+        def service_name
+          raise NotImplementedError
+        end
+      end
+    end
+  end
+end
diff --git a/ee/lib/gitlab/llm/open_ai/exponential_backoff.rb b/ee/lib/gitlab/llm/open_ai/exponential_backoff.rb
index d0a0497971033..4d03abda5ea91 100644
--- a/ee/lib/gitlab/llm/open_ai/exponential_backoff.rb
+++ b/ee/lib/gitlab/llm/open_ai/exponential_backoff.rb
@@ -4,9 +4,12 @@ module Gitlab
   module Llm
     module OpenAi
       module ExponentialBackoff
+        include ::Gitlab::Llm::Concerns::CircuitBreaker
+
         INITIAL_DELAY = 1.second
         EXPONENTIAL_BASE = 2
         MAX_RETRIES = 3
+
         RateLimitError = Class.new(StandardError)
 
         def self.included(base)
@@ -18,8 +21,16 @@ def retry_methods_with_exponential_backoff(*method_names)
             original_method = instance_method(method_name)
 
             define_method(method_name) do |*args, **kwargs|
-              retry_with_exponential_backoff do
-                original_method.bind_call(self, *args, **kwargs)
+              if Feature.enabled?(:circuit_breaker, type: :ops)
+                run_with_circuit do
+                  retry_with_exponential_backoff do
+                    original_method.bind_call(self, *args, **kwargs)
+                  end
+                end
+              else
+                retry_with_exponential_backoff do
+                  original_method.bind_call(self, *args, **kwargs)
+                end
               end
             end
           end
@@ -35,6 +46,7 @@ def retry_with_exponential_backoff
             response = yield
 
             return if response.nil?
+            raise InternalServerError if response.server_error? && Feature.enabled?(:circuit_breaker, type: :ops)
             return response unless response.too_many_requests?
 
             retries += 1
@@ -45,6 +57,10 @@ def retry_with_exponential_backoff
             next
           end
         end
+
+        def service_name
+          'open_ai'
+        end
       end
     end
   end
diff --git a/ee/spec/lib/gitlab/circuit_breaker/notifier_spec.rb b/ee/spec/lib/gitlab/circuit_breaker/notifier_spec.rb
new file mode 100644
index 0000000000000..1bf243a45ea5a
--- /dev/null
+++ b/ee/spec/lib/gitlab/circuit_breaker/notifier_spec.rb
@@ -0,0 +1,27 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe Gitlab::CircuitBreaker::Notifier, feature_category: :shared do
+  subject { described_class.new }
+
+  describe '#notify' do
+    it 'sends an exception to Gitlab::ErrorTracking' do
+      expect(Gitlab::ErrorTracking).to receive(:track_exception)
+
+      subject.notify('test_service', 'test_event')
+    end
+  end
+
+  describe '#notify_warning' do
+    it do
+      expect { subject.notify_warning('test_service', 'test_message') }.not_to raise_error
+    end
+  end
+
+  describe '#notify_run' do
+    it do
+      expect { subject.notify_run('test_service') { puts 'test block' } }.not_to raise_error
+    end
+  end
+end
diff --git a/ee/spec/lib/gitlab/llm/concerns/circuit_breaker_spec.rb b/ee/spec/lib/gitlab/llm/concerns/circuit_breaker_spec.rb
new file mode 100644
index 0000000000000..8197d165c272d
--- /dev/null
+++ b/ee/spec/lib/gitlab/llm/concerns/circuit_breaker_spec.rb
@@ -0,0 +1,60 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe Gitlab::Llm::Concerns::CircuitBreaker, feature_category: :shared do
+  let(:dummy_class) do
+    Class.new do
+      include ::Gitlab::Llm::Concerns::CircuitBreaker
+
+      def dummy_method
+        run_with_circuit do
+          raise Gitlab::Llm::Concerns::CircuitBreaker::InternalServerError
+        end
+      end
+
+      private
+
+      def service_name
+        'dummy_service'
+      end
+    end
+  end
+
+  subject { dummy_class.new }
+
+  describe '#circuit' do
+    it 'returns nil value' do
+      expect(Circuitbox).to receive(:circuit).with('dummy_service', anything).and_call_original
+      expect(subject.dummy_method).to be_nil
+    end
+
+    it 'does not raise an error' do
+      expect(Circuitbox).to receive(:circuit).with('dummy_service', anything).and_call_original
+      expect { subject.dummy_method }.not_to raise_error
+    end
+  end
+
+  describe '#run_with_circuit' do
+    let(:circuit) { double('circuit') } # rubocop: disable RSpec/VerifiedDoubles
+    let(:block) { proc {} }
+
+    before do
+      allow(subject).to receive(:circuit).and_return(circuit)
+    end
+
+    it 'runs the code block within the Circuitbox circuit' do
+      expect(circuit).to receive(:run).with(exception: false, &block)
+      subject.run_with_circuit(&block)
+    end
+  end
+
+  describe '#service_name' do
+    it 'raises NotImplementedError' do
+      instance = Object.new
+      instance.extend(described_class)
+
+      expect { instance.send(:service_name) }.to raise_error(NotImplementedError)
+    end
+  end
+end
diff --git a/ee/spec/lib/gitlab/llm/open_ai/client_spec.rb b/ee/spec/lib/gitlab/llm/open_ai/client_spec.rb
index 4d113b416194f..881006fbfcf6a 100644
--- a/ee/spec/lib/gitlab/llm/open_ai/client_spec.rb
+++ b/ee/spec/lib/gitlab/llm/open_ai/client_spec.rb
@@ -31,6 +31,7 @@
   end
 
   before do
+    allow(response_double).to receive(:server_error?).and_return(false)
     allow(response_double).to receive(:too_many_requests?).and_return(false)
     allow_next_instance_of(::OpenAI::Client) do |open_ai_client|
       allow(open_ai_client).to receive(method).with(hash_including(expected_options)).and_return(response_double)
diff --git a/ee/spec/lib/gitlab/llm/open_ai/exponential_backoff_spec.rb b/ee/spec/lib/gitlab/llm/open_ai/exponential_backoff_spec.rb
index be164d0ed477e..454a5e7821175 100644
--- a/ee/spec/lib/gitlab/llm/open_ai/exponential_backoff_spec.rb
+++ b/ee/spec/lib/gitlab/llm/open_ai/exponential_backoff_spec.rb
@@ -4,15 +4,21 @@
 
 RSpec.describe Gitlab::Llm::OpenAi::ExponentialBackoff, feature_category: :no_category do # rubocop: disable RSpec/InvalidFeatureCategory
   let(:success) do
-    instance_double(HTTParty::Response, code: 200, success?: true, parsed_response: {}, too_many_requests?: false)
+    instance_double(HTTParty::Response,
+      code: 200, success?: true, parsed_response: {}, server_error?: false, too_many_requests?: false
+    )
   end
 
   let(:too_many_requests_error) do
-    instance_double(HTTParty::Response, code: 429, success?: false, parsed_response: {}, too_many_requests?: true)
+    instance_double(HTTParty::Response,
+      code: 429, success?: false, parsed_response: {}, server_error?: false, too_many_requests?: true
+    )
   end
 
   let(:auth_error) do
-    instance_double(HTTParty::Response, code: 401, success?: false, parsed_response: {}, too_many_requests?: false)
+    instance_double(HTTParty::Response,
+      code: 401, success?: false, parsed_response: {}, server_error?: false, too_many_requests?: false
+    )
   end
 
   let(:response_caller) { -> { success } }
@@ -30,6 +36,25 @@ def dummy_method(response_caller)
 
   subject { dummy_class.new.dummy_method(response_caller) }
 
+  it_behaves_like 'has circuit breaker' do
+    let(:service) { dummy_class.new }
+    let(:subject) { service.dummy_method(response_caller) }
+  end
+
+  context 'with feature flag disabled' do
+    before do
+      stub_feature_flags(circuit_breaker: false)
+    end
+
+    it 'runs the code block outside of the circuit breaker' do
+      service = dummy_class.new
+      subject = service.dummy_method(response_caller)
+
+      expect(service).not_to receive(:run_with_circuit)
+      subject
+    end
+  end
+
   describe '.wrap_method' do
     it 'wraps the instance method and retries with exponential backoff' do
       service = dummy_class.new
diff --git a/ee/spec/support/shared_examples/lib/gitlab/llm/circuit_breaker_shared_examples.rb b/ee/spec/support/shared_examples/lib/gitlab/llm/circuit_breaker_shared_examples.rb
new file mode 100644
index 0000000000000..f4fe78106b292
--- /dev/null
+++ b/ee/spec/support/shared_examples/lib/gitlab/llm/circuit_breaker_shared_examples.rb
@@ -0,0 +1,10 @@
+# frozen_string_literal: true
+
+RSpec.shared_examples 'has circuit breaker' do
+  describe '#call_external_service' do
+    it 'runs the code block within the circuit breaker' do
+      expect(service).to receive(:run_with_circuit)
+      subject
+    end
+  end
+end
-- 
GitLab