diff --git a/app/models/ai/service_access_token.rb b/app/models/ai/service_access_token.rb
index 863bdfc7899cc6592138fb0b5563b22b9e13606d..b8a2a271976d0da7af8f7a348cd6f0cccfe9754a 100644
--- a/app/models/ai/service_access_token.rb
+++ b/app/models/ai/service_access_token.rb
@@ -5,6 +5,7 @@ class ServiceAccessToken < ApplicationRecord
     self.table_name = 'service_access_tokens'
 
     scope :expired, -> { where('expires_at < :now', now: Time.current) }
+    scope :active, -> { where('expires_at > :now', now: Time.current) }
     scope :for_category, ->(category) { where(category: category) }
 
     attr_encrypted :token,
diff --git a/config/feature_flags/development/self_managed_code_suggestions_completion_api.yml b/config/feature_flags/development/self_managed_code_suggestions_completion_api.yml
new file mode 100644
index 0000000000000000000000000000000000000000..7760be2370f61c8d30f57ba4f58660e52fefa24e
--- /dev/null
+++ b/config/feature_flags/development/self_managed_code_suggestions_completion_api.yml
@@ -0,0 +1,8 @@
+---
+name: self_managed_code_suggestions_completion_api
+introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/125563
+rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/418795
+milestone: '16.3'
+type: development
+group: group::application performance
+default_enabled: false
diff --git a/ee/lib/api/code_suggestions.rb b/ee/lib/api/code_suggestions.rb
index ba532d30a30c006c07f05081cf0f3c94363f971c..6e165b64471bddb7d38f0eccee8102d5624860a1 100644
--- a/ee/lib/api/code_suggestions.rb
+++ b/ee/lib/api/code_suggestions.rb
@@ -85,7 +85,16 @@ def completions_endpoint
 
       resources :completions do
         post do
-          not_found! unless ::Feature.enabled?(:code_suggestions_completion_api, current_user)
+          if Gitlab.org_or_com?
+            not_found! unless ::Feature.enabled?(:code_suggestions_completion_api, current_user)
+          else
+            not_found! unless ::Feature.enabled?(:self_managed_code_suggestions_completion_api)
+
+            code_suggestions_token = ::Ai::ServiceAccessToken.code_suggestions.active.last
+            unauthorized! if code_suggestions_token.nil?
+
+            headers['X-Gitlab-Oidc-Token'] = code_suggestions_token.token
+          end
 
           response = ::Gitlab::HTTP.post(completions_endpoint, {
             body: params.except(:private_token).to_json,
diff --git a/ee/spec/requests/api/code_suggestions_spec.rb b/ee/spec/requests/api/code_suggestions_spec.rb
index dbdd579882d8de7c3ef0cc77ab5c03c4a445ebd5..645ced82d45d083fb7f4fb8c9b55195ee7688983 100644
--- a/ee/spec/requests/api/code_suggestions_spec.rb
+++ b/ee/spec/requests/api/code_suggestions_spec.rb
@@ -163,14 +163,8 @@
   end
 
   describe 'POST /code_suggestions/completions' do
+    let_it_be(:token) { 'JWTTOKEN' }
     let(:access_code_suggestions) { true }
-    let(:headers) do
-      {
-        'X-Gitlab-Authentication-Type' => 'oidc',
-        'X-Gitlab-Oidc-Token' => "JWTTOKEN",
-        'Content-Type' => 'application/json'
-      }
-    end
 
     let(:body) do
       {
@@ -193,56 +187,113 @@
       allow(Ability).to receive(:allowed?).and_call_original
       allow(Ability).to receive(:allowed?).with(current_user, :access_code_suggestions, :global)
                                           .and_return(access_code_suggestions)
-      allow(Gitlab).to receive(:org_or_com?).and_return(true)
     end
 
-    context 'when user is not logged in' do
-      let(:current_user) { nil }
+    shared_examples 'code completions endpoint' do
+      context 'when user is not logged in' do
+        let(:current_user) { nil }
 
-      include_examples 'an unauthorized response'
-    end
+        include_examples 'an unauthorized response'
+      end
 
-    context 'when user does not have access to code suggestions' do
-      let(:access_code_suggestions) { false }
+      context 'when user does not have access to code suggestions' do
+        let(:access_code_suggestions) { false }
 
-      include_examples 'an unauthorized response'
-    end
+        include_examples 'an unauthorized response'
+      end
 
-    context 'when user is logged in' do
-      let(:current_user) { create(:user) }
+      context 'when user is logged in' do
+        let(:current_user) { create(:user) }
 
-      it 'proxies request to code suggestions service' do
-        expect(Gitlab::HTTP).to receive(:post).with(
-          "https://codesuggestions.gitlab.com/v2/completions",
-          {
-            body: body.to_json,
-            headers: {
-              'X-Gitlab-Authentication-Type' => 'oidc',
-              'Authorization' => 'Bearer JWTTOKEN',
-              'Content-Type' => 'application/json'
-            },
-            open_timeout: 3,
-            read_timeout: 5,
-            write_timeout: 5
-          }
-        )
+        it 'proxies request to code suggestions service with the auth token from the DB' do
+          expect(Gitlab::HTTP).to receive(:post).with(
+            "https://codesuggestions.gitlab.com/v2/completions",
+            {
+              body: body.to_json,
+              headers: {
+                'X-Gitlab-Authentication-Type' => 'oidc',
+                'Authorization' => "Bearer #{token}",
+                'Content-Type' => 'application/json'
+              },
+              open_timeout: 3,
+              read_timeout: 5,
+              write_timeout: 5
+            }
+          )
 
-        post_api
-      end
+          post_api
+        end
 
-      context 'when overriding service base URL' do
-        before do
-          stub_env('CODE_SUGGESTIONS_BASE_URL', 'http://test.com')
+        context 'when overriding service base URL' do
+          before do
+            stub_env('CODE_SUGGESTIONS_BASE_URL', 'http://test.com')
+          end
+
+          it 'sends requests to this URL instead' do
+            expect(Gitlab::HTTP).to receive(:post).with('http://test.com/v2/completions', an_instance_of(Hash))
+
+            post_api
+          end
         end
 
-        it 'sends requests to this URL instead' do
-          expect(Gitlab::HTTP).to receive(:post).with('http://test.com/v2/completions', an_instance_of(Hash))
+        context 'with telemetry headers' do
+          let(:headers) do
+            {
+              'X-Gitlab-Authentication-Type' => 'oidc',
+              'X-Gitlab-Oidc-Token' => token,
+              'Content-Type' => 'application/json',
+              'X-GitLab-CS-Accepts' => 'accepts',
+              'X-GitLab-CS-Requests' => "requests",
+              'X-GitLab-CS-Errors' => 'errors',
+              'X-GitLab-CS-Custom' => 'helloworld',
+              'X-GitLab-NO-Ignore' => 'ignoreme'
+            }
+          end
+
+          it 'proxies appropriate headers to code suggestions service' do
+            expect(Gitlab::HTTP).to receive(:post).with(
+              "https://codesuggestions.gitlab.com/v2/completions",
+              {
+                body: body.to_json,
+                headers: {
+                  'X-Gitlab-Authentication-Type' => 'oidc',
+                  'Authorization' => "Bearer #{token}",
+                  'Content-Type' => 'application/json',
+                  'X-Gitlab-Cs-Accepts' => 'accepts',
+                  'X-Gitlab-Cs-Requests' => "requests",
+                  'X-Gitlab-Cs-Errors' => 'errors',
+                  'X-Gitlab-Cs-Custom' => 'helloworld'
+                },
+                open_timeout: 3,
+                read_timeout: 5,
+                write_timeout: 5
+              }
+            )
 
-          post_api
+            post_api
+          end
         end
       end
+    end
+
+    context 'when the instance is Gitlab.org_or_com' do
+      before do
+        allow(Gitlab).to receive(:org_or_com?).and_return(true)
+      end
+
+      let(:headers) do
+        {
+          'X-Gitlab-Authentication-Type' => 'oidc',
+          'X-Gitlab-Oidc-Token' => token,
+          'Content-Type' => 'application/json'
+        }
+      end
+
+      it_behaves_like 'code completions endpoint'
 
       context 'when feature flag is disabled' do
+        let(:current_user) { create(:user) }
+
         before do
           stub_feature_flags(code_suggestions_completion_api: false)
         end
@@ -254,42 +305,49 @@
           end
         end
       end
+    end
+
+    context 'when the instance is Gitlab self-managed' do
+      before do
+        allow(Gitlab).to receive(:org_or_com?).and_return(false)
+      end
+
+      let(:headers) do
+        {
+          'X-Gitlab-Authentication-Type' => 'oidc',
+          'Content-Type' => 'application/json'
+        }
+      end
+
+      let_it_be(:service_access_token) { create(:service_access_token, :code_suggestions, :active, token: token) }
 
-      context 'with telemetry headers' do
-        let(:headers) do
-          {
-            'X-Gitlab-Authentication-Type' => 'oidc',
-            'X-Gitlab-Oidc-Token' => "JWTTOKEN",
-            'Content-Type' => 'application/json',
-            'X-GitLab-CS-Accepts' => 'accepts',
-            'X-GitLab-CS-Requests' => "requests",
-            'X-GitLab-CS-Errors' => 'errors',
-            'X-GitLab-CS-Custom' => 'helloworld',
-            'X-GitLab-NO-Ignore' => 'ignoreme'
-          }
+      it_behaves_like 'code completions endpoint'
+
+      context 'when there is no active code suggestions token' do
+        before do
+          create(:service_access_token, :code_suggestions, :expired, token: token)
         end
 
-        it 'proxies appropriate headers to code suggestions service' do
-          expect(Gitlab::HTTP).to receive(:post).with(
-            "https://codesuggestions.gitlab.com/v2/completions",
-            {
-              body: body.to_json,
-              headers: {
-                'X-Gitlab-Authentication-Type' => 'oidc',
-                'Authorization' => 'Bearer JWTTOKEN',
-                'Content-Type' => 'application/json',
-                'X-Gitlab-Cs-Accepts' => 'accepts',
-                'X-Gitlab-Cs-Requests' => "requests",
-                'X-Gitlab-Cs-Errors' => 'errors',
-                'X-Gitlab-Cs-Custom' => 'helloworld'
-              },
-              open_timeout: 3,
-              read_timeout: 5,
-              write_timeout: 5
-            }
-          )
+        include_examples 'a response', 'unauthorized' do
+          let(:result) { :unauthorized }
+          let(:body) do
+            { "message" => "401 Unauthorized" }
+          end
+        end
+      end
 
-          post_api
+      context 'when feature flag is disabled' do
+        let(:current_user) { create(:user) }
+
+        before do
+          stub_feature_flags(self_managed_code_suggestions_completion_api: false)
+        end
+
+        include_examples 'a response', 'not found' do
+          let(:result) { :not_found }
+          let(:body) do
+            { "message" => "404 Not Found" }
+          end
         end
       end
     end
diff --git a/spec/models/ai/service_access_token_spec.rb b/spec/models/ai/service_access_token_spec.rb
index 12ed24f3bd6a98a80c3432ebd9b36fed635c6dc6..d979db4b3d61628dd1b0016c1916ed041b8ae472 100644
--- a/spec/models/ai/service_access_token_spec.rb
+++ b/spec/models/ai/service_access_token_spec.rb
@@ -12,6 +12,15 @@
     end
   end
 
+  describe '.active', :freeze_time do
+    let_it_be(:expired_token) { create(:service_access_token, :code_suggestions, :expired) }
+    let_it_be(:active_token) {  create(:service_access_token, :code_suggestions, :active) }
+
+    it 'selects all active tokens' do
+      expect(described_class.active).to match_array([active_token])
+    end
+  end
+
   # There is currently only one category, please expand this test when a new category is added.
   describe '.for_category' do
     let(:code_suggestions_token) { create(:service_access_token, :code_suggestions) }