diff --git a/.rubocop_todo/style/inline_disable_annotation.yml b/.rubocop_todo/style/inline_disable_annotation.yml index 937e621a064b9a88cd14b3fe0fb0fd83fce776f0..16a9d03d19f66e1971f0a8a0ef1b41620c517cf6 100644 --- a/.rubocop_todo/style/inline_disable_annotation.yml +++ b/.rubocop_todo/style/inline_disable_annotation.yml @@ -1864,7 +1864,6 @@ Style/InlineDisableAnnotation: - 'ee/lib/gitlab/llm/chain/tools/identifier.rb' - 'ee/lib/gitlab/llm/chain/tools/summarize_comments/executor.rb' - 'ee/lib/gitlab/llm/chat_storage.rb' - - 'ee/lib/gitlab/llm/open_ai/client.rb' - 'ee/lib/gitlab/llm/templates/explain_vulnerability.rb' - 'ee/lib/gitlab/llm/vertex_ai/completions/summarize_submitted_review.rb' - 'ee/lib/gitlab/middleware/ip_restrictor.rb' diff --git a/doc/architecture/blueprints/ai_gateway/index.md b/doc/architecture/blueprints/ai_gateway/index.md index c883a6746942ea0919fce9e1336e948f5817d92e..3af9139100dfe8e5764b628364f556ed5789b5d9 100644 --- a/doc/architecture/blueprints/ai_gateway/index.md +++ b/doc/architecture/blueprints/ai_gateway/index.md @@ -371,7 +371,6 @@ following providers: - [Anthropic](https://gitlab.com/gitlab-org/gitlab/blob/4344729240496a5018e19a82030d6d4b227e9c79/ee/lib/gitlab/llm/anthropic/client.rb#L6) - [Vertex](https://gitlab.com/gitlab-org/gitlab/blob/4344729240496a5018e19a82030d6d4b227e9c79/ee/lib/gitlab/llm/vertex_ai/client.rb#L6) -- [OpenAI](https://gitlab.com/gitlab-org/gitlab/blob/4344729240496a5018e19a82030d6d4b227e9c79/ee/lib/gitlab/llm/open_ai/client.rb#L8) To make these features available to self-managed instances, we should provide endpoints for each of these that GitLab.com, self-managed or diff --git a/ee/lib/gitlab/llm/open_ai/client.rb b/ee/lib/gitlab/llm/open_ai/client.rb deleted file mode 100644 index bff9d5d40b76cb5137e5203f8ed90d259f5bb303..0000000000000000000000000000000000000000 --- a/ee/lib/gitlab/llm/open_ai/client.rb +++ /dev/null @@ -1,213 +0,0 @@ -# frozen_string_literal: true - -require 'openai' - -module Gitlab - module Llm - module OpenAi - class Client - include ::Gitlab::Llm::Concerns::ExponentialBackoff - include ::Gitlab::Llm::Concerns::EventTracking - - InputModerationError = Class.new(StandardError) - OutputModerationError = Class.new(StandardError) - - def initialize(user, request_timeout: nil, tracking_context: {}) - @user = user - @request_timeout = request_timeout - @tracking_context = tracking_context - @logger = Gitlab::Llm::Logger.build - end - - def chat(content:, moderated: nil, **options) - request( - endpoint: :chat, - moderated: warn_if_moderated_unset(moderated, default: true), - parameters: Options.new.chat(content: content, **options) - ) - end - - # messages: an array with `role` and `content` a keys. - # the value of `role` should be one of GPT_ROLES - # this needed to pass back conversation history - def messages_chat(messages:, moderated: nil, **options) - request( - endpoint: :chat, - moderated: warn_if_moderated_unset(moderated, default: true), - parameters: Options.new.messages_chat(messages: messages, **options) - ) - end - - def completions(prompt:, moderated: nil, **options) - request( - endpoint: :completions, - moderated: warn_if_moderated_unset(moderated, default: true), - parameters: Options.new.completions(prompt: prompt, **options) - ) - end - - def edits(input:, instruction:, moderated: nil, **options) - request( - endpoint: :edits, - moderated: warn_if_moderated_unset(moderated, default: true), - parameters: Options.new.edits(input: input, instruction: instruction, **options) - ) - end - - def embeddings(input:, moderated: nil, **options) - request( - endpoint: :embeddings, - moderated: warn_if_moderated_unset(moderated, default: false), - parameters: Options.new.embeddings(input: input, **options) - ) - end - - def moderations(input:, **options) - request( - endpoint: :moderations, - moderated: false, - parameters: Options.new.moderations(input: input, **options) - ) - end - - private - - attr_reader :user, :request_timeout, :logger, :tracking_context - - def client - @client ||= OpenAI::Client.new(access_token: access_token, request_timeout: request_timeout) - end - - def enabled? - access_token.present? && Feature.enabled?(:ai_global_switch, type: :ops) - end - - def access_token - @token ||= ::Gitlab::CurrentSettings.openai_api_key - end - - def warn_if_moderated_unset(moderated, default:) - return moderated unless moderated.nil? - - msg = "The `moderated` argument is not set, and defaults to `#{default}`. " \ - "Please update this code to explicitly pass this argument" - # Reject stack entries related to this class to reach client code - regexp = /#{__FILE__}|exponential_backoff.rb|circuit_breaker.rb/ - stacktrace = caller_locations.reject { |loc| loc.to_s =~ regexp } - ActiveSupport::Deprecation.warn(msg, stacktrace) - - default - end - - # @param [Symbol] endpoint - OpenAI endpoint to call - # @param [Boolean, Symbol] moderated - Whether to moderate the input and/or output. - # `true` - moderate both, - # `false` - moderation none, - # `:input` - moderate only input, - # `:output` - moderate only output - # @param [Hash] options - Options to pass to the OpenAI client - def request(endpoint:, moderated:, **options) - return unless enabled? - - logger.info(message: "Performing request to OpenAI", endpoint: endpoint) - - moderate!(:input, moderation_input(endpoint, options)) if should_moderate?(:input, moderated) - - response = retry_with_exponential_backoff do - client.public_send(endpoint, **options) # rubocop:disable GitlabSecurity/PublicSend - end - - logger.debug(message: "Received response from OpenAI", response: response) - - track_cost(endpoint, response.parsed_response&.dig('usage')) - - if should_moderate?(:output, moderated) - moderate!(:output, moderation_output(endpoint, response.parsed_response)) - end - - response - end - - def track_cost(endpoint, usage_data) - return unless usage_data - - track_cost_metric("#{endpoint}/prompt", usage_data['prompt_tokens']) - track_cost_metric("#{endpoint}/completion", usage_data['completion_tokens']) - - track_prompt_size(usage_data['prompt_tokens']) - track_response_size(usage_data['completion_tokens']) - end - - def track_cost_metric(context, amount) - return unless amount - - cost_metric.increment( - { - vendor: 'open_ai', - item: context, - unit: 'tokens', - feature_category: ::Gitlab::ApplicationContext.current_context_attribute(:feature_category) - }, - amount - ) - end - - def cost_metric - @cost_metric ||= Gitlab::Metrics.counter( - :gitlab_cloud_cost_spend_entry_total, - 'Number of units spent per vendor entry' - ) - end - - def should_moderate?(type, moderation_value) - return false if moderation_value == false - return true if moderation_value == true - return true if type == :input && moderation_value == :input - return true if type == :output && moderation_value == :output - - false - end - - # @param [Symbol] type - Type of text to moderate, input or output - # @param [String] text - Text to moderate - def moderate!(type, text) - return unless text.present? - - flagged = moderations(input: text) - .parsed_response - &.dig('results') - &.any? { |r| r['flagged'] } - - return unless flagged - - error_type = type == :input ? InputModerationError : OutputModerationError - error_message = "Provided #{type} violates OpenAI's Content Policy" - - raise(error_type, error_message) - end - - # rubocop:disable CodeReuse/ActiveRecord - def moderation_input(endpoint, options) - case endpoint - when :chat - options.dig(:parameters, :messages).pluck(:content) - when :completions - options.dig(:parameters, :prompt) - when :edits, :embeddings - options.dig(:parameters, :input) - end - end - - def moderation_output(endpoint, parsed_response) - case endpoint - when :chat - parsed_response&.dig('choices')&.pluck('message')&.pluck('content')&.map { |str| str.delete('\"') } - when :edits, :completions - parsed_response&.dig('choices')&.pluck('text') - end - end - # rubocop:enable CodeReuse/ActiveRecord - end - end - end -end diff --git a/ee/lib/gitlab/metrics/llm.rb b/ee/lib/gitlab/metrics/llm.rb index cef559841c2ef99094cccb811bd6a6ff1ccd4611..8372d918431046f8bf2e11bdfe495ff9a4f870e0 100644 --- a/ee/lib/gitlab/metrics/llm.rb +++ b/ee/lib/gitlab/metrics/llm.rb @@ -7,8 +7,7 @@ class << self CLIENT_NAMES = { 'Gitlab::Llm::AiGateway::Client' => :ai_gateway, 'Gitlab::Llm::VertexAi::Client' => :vertex_ai, - 'Gitlab::Llm::Anthropic::Client' => :anthropic, - 'Gitlab::Llm::OpenAi::Client' => :open_ai + 'Gitlab::Llm::Anthropic::Client' => :anthropic }.freeze def initialize_slis! diff --git a/ee/spec/lib/gitlab/llm/open_ai/client_spec.rb b/ee/spec/lib/gitlab/llm/open_ai/client_spec.rb deleted file mode 100644 index 34cc7dc530c640631ba100d4c62c18d676730546..0000000000000000000000000000000000000000 --- a/ee/spec/lib/gitlab/llm/open_ai/client_spec.rb +++ /dev/null @@ -1,500 +0,0 @@ -# frozen_string_literal: true - -require 'spec_helper' - -RSpec.describe Gitlab::Llm::OpenAi::Client, feature_category: :ai_abstraction_layer do - let_it_be(:user) { create(:user) } - - let(:access_token) { 'secret' } - let(:default_options) { {} } - let(:expected_options) { {} } - let(:options) { {} } - let(:response) { instance_double(Net::HTTPResponse, body: example_response.to_json) } - let(:tracking_context) { { request_id: 'uuid', action: 'chat' } } - let(:example_response) do - { - 'model' => 'model', - 'choices' => [ - { - 'message' => { - 'content' => 'foo' - } - }, - { - 'message' => { - 'content' => 'bar' - } - } - ], - 'usage' => { - 'prompt_tokens' => 1, - 'completion_tokens' => 2, - 'total_tokens' => 3 - } - } - end - - let(:moderation_response) do - { 'results' => [{ 'flagged' => false }] } - end - - let(:response_double) do - instance_double(HTTParty::Response, code: 200, success?: true, - response: response, parsed_response: example_response) - end - - let(:moderation_response_double) do - instance_double(HTTParty::Response, code: 200, success?: true, - response: response, parsed_response: moderation_response) - end - - around do |ex| - # Silence moderation unset deprecations - ActiveSupport::Deprecation.silence do - ex.run - end - end - - before do - allow(response_double).to receive(:server_error?).and_return(false) - allow(response_double).to receive(:too_many_requests?).and_return(false) - allow(moderation_response_double).to receive(:server_error?).and_return(false) - allow(moderation_response_double).to receive(:too_many_requests?).and_return(false) - allow_next_instance_of(::OpenAI::Client) do |open_ai_client| - allow(open_ai_client) - .to receive(:public_send) - .with(method, hash_including(expected_options)) - .and_return(response_double) - - allow(open_ai_client) - .to receive(:public_send) - .with(:moderations, anything) - .and_return(moderation_response_double) - end - - stub_application_setting(openai_api_key: access_token) - end - - shared_examples 'forwarding the request correctly' do - context 'when feature flag and access token is set' do - it { is_expected.to eq(response_double) } - end - - context 'when using options' do - let(:expected_options) { { parameters: hash_including({ temperature: 0.1 }) } } - let(:options) { { temperature: 0.1 } } - - it { is_expected.to eq(response_double) } - end - - context 'when the feature flag is disabled' do - before do - stub_feature_flags(ai_global_switch: false) - end - - it { is_expected.to be_nil } - end - - context 'when the access key is not present' do - let(:access_token) { nil } - - it { is_expected.to be_nil } - end - end - - shared_examples 'cost tracking' do - it 'tracks prompt and completion tokens cost' do - ::Gitlab::ApplicationContext.push(feature_category: 'not_owned') - - counter = instance_double(Prometheus::Client::Counter, increment: true) - - allow(Gitlab::Metrics::Sli::ErrorRate[:llm_client_request]).to receive(:increment) - allow(Gitlab::Metrics).to receive(:counter).and_return(counter) - - expect(counter) - .to receive(:increment) - .with( - { - vendor: 'open_ai', - item: "#{method}/prompt", - unit: 'tokens', - feature_category: 'not_owned' - }, - example_response['usage']['prompt_tokens'] - ) - - expect(counter) - .to receive(:increment) - .with( - { - vendor: 'open_ai', - item: "#{method}/completion", - unit: 'tokens', - feature_category: 'not_owned' - }, - example_response['usage']['completion_tokens'] - ) - - subject - end - end - - shared_examples 'event tracking' do - it 'tracks a snowplow event' do - subject - - expect_snowplow_event( - category: described_class.to_s, - action: 'tokens_per_user_request_prompt', - property: 'uuid', - label: 'chat', - user: user, - value: example_response['usage']['prompt_tokens'] - ) - - expect_snowplow_event( - category: described_class.to_s, - action: 'tokens_per_user_request_response', - property: 'uuid', - label: 'chat', - user: user, - value: example_response['usage']['completion_tokens'] - ) - end - end - - shared_examples 'input moderation' do - context 'when moderation flag is nil' do - let(:options) { { moderated: nil } } - - it 'produces a deprecation warning' do - expect_next_instance_of(::OpenAI::Client) do |open_ai_client| - expect(open_ai_client) - .to receive(:public_send) - .with(method, anything) - .and_return(response_double) - - allow(open_ai_client) - .to receive(:public_send) - .with(:moderations, anything) - .and_return(moderation_response_double) - end - - expect(ActiveSupport::Deprecation).to receive(:warn).with(/`moderated` argument is not set/, anything) - - subject - end - end - - context 'when moderation flag is set' do - let(:options) { { moderated: :input } } - - context 'when response is not flagged' do - it 'returns the response from original endpoint' do - expect_next_instance_of(::OpenAI::Client) do |open_ai_client| - expect(open_ai_client) - .to receive(:public_send) - .with(method, anything) - .and_return(response_double) - - expect(open_ai_client) - .to receive(:public_send) - .with(:moderations, anything) - .once - .and_return(moderation_response_double) - end - - subject - end - end - - context 'when response is flagged' do - let(:moderation_response) do - { 'results' => [{ 'flagged' => true }, { 'flagged' => false }] } - end - - it 'raises TextModerationError' do - expect { subject } - .to raise_error(described_class::InputModerationError, "Provided input violates OpenAI's Content Policy") - end - end - end - - context 'when moderation flag is false' do - let(:options) { { moderated: false } } - - it 'does not call the moderation endpoint' do - expect_next_instance_of(::OpenAI::Client) do |open_ai_client| - expect(open_ai_client) - .to receive(:public_send) - .with(method, anything) - .and_return(response_double) - - expect(open_ai_client).not_to receive(:moderations) - end - - expect(subject).to eq(response_double) - end - end - end - - shared_examples 'output moderation' do - before do - allow_next_instance_of(::OpenAI::Client) do |open_ai_client| - allow(open_ai_client) - .to receive(:public_send) - .with(method, anything) - .and_return(response_double) - - allow(open_ai_client) - .to receive(:public_send) - .with(:moderations, anything) - .and_return(moderation_response_double) - end - end - - context 'when moderation flag is nil' do - let(:options) { { moderated: nil } } - - it 'produces a deprecation warning' do - expect(ActiveSupport::Deprecation).to receive(:warn).with(/`moderated` argument is not set/, anything) - - subject - end - end - - context 'when output moderation flag is true' do - let(:options) { { moderated: :output } } - - context 'when response is not flagged' do - it 'returns the response from original endpoint' do - expect_next_instance_of(::OpenAI::Client) do |open_ai_client| - expect(open_ai_client) - .to receive(:public_send) - .with(method, anything) - .and_return(response_double) - - expect(open_ai_client) - .to receive(:public_send) - .with(:moderations, anything) - .once - .and_return(moderation_response_double) - end - - subject - end - end - - context 'when response is flagged' do - let(:moderation_response) do - { 'results' => [{ 'flagged' => true }, { 'flagged' => false }] } - end - - it 'raises TextModerationError' do - expect { subject } - .to raise_error(described_class::OutputModerationError, "Provided output violates OpenAI's Content Policy") - end - end - end - - context 'when moderation flag is false' do - let(:options) { { moderated: false } } - - it 'does not call the moderation endpoint' do - expect_next_instance_of(::OpenAI::Client) do |open_ai_client| - expect(open_ai_client) - .to receive(:public_send) - .with(method, anything) - .and_return(response_double) - - expect(open_ai_client).not_to receive(:moderations) - end - - expect(subject).to eq(response_double) - end - end - end - - describe '#chat' do - subject(:chat) do - described_class.new(user, tracking_context: tracking_context).chat(content: 'anything', **options) - end - - let(:method) { :chat } - - it_behaves_like 'forwarding the request correctly' - it_behaves_like 'tracks events for AI requests', 1, 2 - include_examples 'cost tracking' - include_examples 'event tracking' - include_examples 'input moderation' - include_examples 'output moderation' - - context 'when measuring request success' do - let(:client) { :open_ai } - let(:options) { { moderated: false } } - - it_behaves_like 'measured Llm request' - - context 'when request raises an exception' do - before do - allow_next_instance_of(OpenAI::Client) do |open_client| - allow(open_client).to receive(:chat).and_raise(StandardError) - end - end - - it_behaves_like 'measured Llm request with error', StandardError - end - - context 'when request is retried' do - let(:http_status) { 429 } - - before do - stub_const("Gitlab::Llm::Concerns::ExponentialBackoff::INITIAL_DELAY", 0.0) - allow(response_double).to receive(:too_many_requests?).and_return(true) - end - - it_behaves_like 'measured Llm request with error', Gitlab::Llm::Concerns::ExponentialBackoff::RateLimitError - end - end - end - - describe '#messages_chat' do - stub_feature_flags(ai_global_switch: true) - - subject(:messages_chat) do - described_class.new(user, tracking_context: tracking_context).messages_chat( - messages: messages, - **options - ) - end - - let(:messages) do - [ - { role: ::Gitlab::Llm::OpenAi::Options::SYSTEM_ROLE, content: 'you are a language model' }, - { role: ::Gitlab::Llm::OpenAi::Options::DEFAULT_ROLE, content: 'what?' }, - { 'role' => ::Gitlab::Llm::OpenAi::Options::DEFAULT_ROLE, 'content' => 'are string keys ok?' } - ] - end - - let(:method) { :chat } - let(:options) { { temperature: 0.1 } } - let(:expected_options) { { parameters: hash_including({ messages: messages, temperature: 0.1 }) } } - - it_behaves_like 'forwarding the request correctly' - it_behaves_like 'tracks events for AI requests', 1, 2 - include_examples 'cost tracking' - include_examples 'event tracking' - include_examples 'input moderation' - include_examples 'output moderation' - - context 'without the correct role' do - let(:messages) do - [ - { role: 'Charles Darwin', content: 'you are a language model' }, - { role: 'Teacher', content: 'what?' } - ] - end - - it 'raises an error' do - expect { messages_chat }.to raise_error ArgumentError - end - end - end - - describe '#completions' do - subject(:completions) do - described_class.new(user, tracking_context: tracking_context).completions(prompt: 'anything', **options) - end - - let(:method) { :completions } - - it_behaves_like 'forwarding the request correctly' - it_behaves_like 'tracks events for AI requests', 1, 2 - include_examples 'cost tracking' - include_examples 'event tracking' - include_examples 'input moderation' - include_examples 'output moderation' - end - - describe '#edits' do - subject(:edits) do - described_class.new(user, tracking_context: tracking_context).edits(input: 'foo', instruction: 'bar', **options) - end - - let(:method) { :edits } - - it_behaves_like 'forwarding the request correctly' - it_behaves_like 'tracks events for AI requests', 1, 2 - include_examples 'cost tracking' - include_examples 'event tracking' - include_examples 'input moderation' - include_examples 'output moderation' - end - - describe '#embeddings' do - subject(:embeddings) do - described_class.new(user, tracking_context: tracking_context).embeddings(input: 'foo', **options) - end - - let(:method) { :embeddings } - let(:example_response) do - { - 'model' => 'gpt-3.5-turbo', - "data" => [ - { - "embedding" => [ - -0.006929283495992422, - -0.005336422007530928 - ] - } - ], - 'usage' => { - 'prompt_tokens' => 1, - 'completion_tokens' => 2, - 'total_tokens' => 3 - } - } - end - - it_behaves_like 'forwarding the request correctly' - it_behaves_like 'tracks events for AI requests', 1, 2 - include_examples 'cost tracking' - include_examples 'event tracking' - include_examples 'input moderation' - end - - describe '#moderations' do - subject(:moderations) do - described_class.new(user, tracking_context: tracking_context).moderations(input: 'foo', **options) - end - - let(:method) { :moderations } - let(:example_response) do - { - 'model' => 'model', - 'results' => [ - { - "categories" => { - "category" => false - }, - "category_scores" => { - "category" => 0.22714105248451233 - }, - "flagged" => false - } - ] - } - end - - before do - allow_next_instance_of(::OpenAI::Client) do |open_ai_client| - allow(open_ai_client) - .to receive(:public_send) - .with(method, hash_including(expected_options)) - .and_return(response_double) - end - end - - it_behaves_like 'forwarding the request correctly' - end -end diff --git a/ee/spec/lib/gitlab/metrics/llm_spec.rb b/ee/spec/lib/gitlab/metrics/llm_spec.rb index a417570dab94bf6e198ab94f202df80f56c12c6b..34e31a1edfef0d95bf1a06a293a50146e0cfa456 100644 --- a/ee/spec/lib/gitlab/metrics/llm_spec.rb +++ b/ee/spec/lib/gitlab/metrics/llm_spec.rb @@ -27,7 +27,6 @@ expect(described_class.client_label(Gitlab::Llm::AiGateway::Client)).to eq(:ai_gateway) expect(described_class.client_label(Gitlab::Llm::VertexAi::Client)).to eq(:vertex_ai) expect(described_class.client_label(Gitlab::Llm::Anthropic::Client)).to eq(:anthropic) - expect(described_class.client_label(Gitlab::Llm::OpenAi::Client)).to eq(:open_ai) end it 'returns :unknwon for other classes' do