diff --git a/ee/app/assets/javascripts/ai/constants.js b/ee/app/assets/javascripts/ai/constants.js index d433183dc90e1801cd0877144c9428f9f8529545..f1bb4faab273f835712f9e7bf9640f9c864cb1d9 100644 --- a/ee/app/assets/javascripts/ai/constants.js +++ b/ee/app/assets/javascripts/ai/constants.js @@ -49,8 +49,4 @@ export const FEEDBACK_OPTIONS = [ }, ]; -const MODEL_MAX_TOKENS = 4096; // max for 'gpt-3.5-turbo' model as per https://platform.openai.com/docs/models/gpt-3-5 -export const MAX_RESPONSE_TOKENS = 300; -export const TOKENS_THRESHOLD = MODEL_MAX_TOKENS * 0.85; // We account for 15% fault in tokens calculation - export const EXPLAIN_CODE_TRACKING_EVENT_NAME = 'explain_code_blob_viewer'; diff --git a/ee/app/assets/javascripts/ai/utils.js b/ee/app/assets/javascripts/ai/utils.js index 3c5822bef8072839765168627851f5d48393ee4e..93a5d449ccc39f4052947609222e31364de8c4d9 100644 --- a/ee/app/assets/javascripts/ai/utils.js +++ b/ee/app/assets/javascripts/ai/utils.js @@ -1,14 +1,13 @@ import { findLastIndex } from 'lodash'; import { sprintf, __ } from '~/locale'; -import { - TOO_LONG_ERROR_TYPE, - i18n, - MAX_RESPONSE_TOKENS, - TOKENS_THRESHOLD, - GENIE_CHAT_MODEL_ROLES, -} from './constants'; +import { TOO_LONG_ERROR_TYPE, i18n, GENIE_CHAT_MODEL_ROLES } from './constants'; const areMessagesWithinLimit = (messages) => { + const MAX_RESPONSE_TOKENS = gon.ai?.chat?.max_response_token; + const TOKENS_THRESHOLD = gon.ai?.chat?.input_content_limit; + + if (!MAX_RESPONSE_TOKENS || !TOKENS_THRESHOLD) return true; // delegate dealing with the prompt size to BE + // we use `utils.computeTokens()` below to make it easier to test and mock calls to computeTokens() // eslint-disable-next-line no-use-before-define return utils.computeTokens(messages) + MAX_RESPONSE_TOKENS < TOKENS_THRESHOLD; diff --git a/ee/lib/ee/gitlab/gon_helper.rb b/ee/lib/ee/gitlab/gon_helper.rb index 3e16fef268cbc423efbe7842149ca871c38edb09..633a8eebd61b8547e81201c04a3463ed3ab58eab 100644 --- a/ee/lib/ee/gitlab/gon_helper.rb +++ b/ee/lib/ee/gitlab/gon_helper.rb @@ -11,6 +11,16 @@ def add_gon_variables gon.roadmap_epics_limit = 1000 + if current_user && defined?(Llm) + ai_chat = { + total_model_token: ::Llm::ExplainCodeService::TOTAL_MODEL_TOKEN_LIMIT, + max_response_token: ::Llm::ExplainCodeService::MAX_RESPONSE_TOKENS, + input_content_limit: ::Llm::ExplainCodeService::INPUT_CONTENT_LIMIT + } + + push_to_gon_attributes('ai', 'chat', ai_chat) + end + if ::Gitlab.com? gon.subscriptions_url = ::Gitlab::Routing.url_helpers.subscription_portal_url gon.subscriptions_legacy_sign_in_url = ::Gitlab::Routing.url_helpers.subscription_portal_legacy_sign_in_url diff --git a/ee/spec/frontend/ai/utils_spec.js b/ee/spec/frontend/ai/utils_spec.js index 80d0d0aa549e02542018efea62d29f4c4a7efff2..f6a0ef4235b2324139e3cacb46ff5fde6e1bb30d 100644 --- a/ee/spec/frontend/ai/utils_spec.js +++ b/ee/spec/frontend/ai/utils_spec.js @@ -1,27 +1,24 @@ import { utils } from 'ee/ai/utils'; -import { - i18n, - TOKENS_THRESHOLD, - MAX_RESPONSE_TOKENS, - GENIE_CHAT_MODEL_ROLES, -} from 'ee/ai/constants'; +import { i18n, GENIE_CHAT_MODEL_ROLES } from 'ee/ai/constants'; import { sprintf } from '~/locale'; -jest.mock('ee/ai/constants', () => { - // To simplify the things in testing, we override the constatants - // to make the MAX_RESPONSE_TOKENS and TOKENS_THRESHOLD smaller - // and easier to control - const originalConstants = jest.requireActual('ee/ai/constants'); - return { - ...originalConstants, - TOKENS_THRESHOLD: 40, // 36 * 4 = 144 characters. - MAX_RESPONSE_TOKENS: 4, // 4 * 4 = 16 characters. - }; -}); - +// To simplify the things in testing, we override the globals +// to make the MAX_RESPONSE_TOKENS and TOKENS_THRESHOLD smaller +// and easier to control +const TOKENS_THRESHOLD = 40; +const MAX_RESPONSE_TOKENS = 4; const MAX_PROMPT_TOKENS = TOKENS_THRESHOLD - MAX_RESPONSE_TOKENS; // 36 tokens describe('AI Utils', () => { + beforeEach(() => { + gon.ai = { + chat: { + max_response_token: MAX_RESPONSE_TOKENS, + input_content_limit: TOKENS_THRESHOLD, + }, + }; + }); + describe('generateExplainCodePrompt', () => { const filePath = 'fooPath'; const fileText = 'barText'; @@ -156,6 +153,23 @@ describe('AI Utils', () => { i18n.TOO_LONG_ERROR_MESSAGE, ); }); + + it.each` + max_response_token | input_content_limit + ${123} | ${undefined} + ${undefined} | ${123} + ${undefined} | ${undefined} + `( + 'drops no messages if token limitations are not available (delegates dealing with the prompt to BE)', + ({ max_response_token, input_content_limit }) => { + gon.ai = { + chat: { max_response_token, input_content_limit }, + }; + + result = utils.generateChatPrompt(userPrompt, basePrompts); + expect(result).toEqual([...basePrompts, lastUserMessage]); + }, + ); }); }); diff --git a/ee/spec/lib/ee/gitlab/gon_helper_spec.rb b/ee/spec/lib/ee/gitlab/gon_helper_spec.rb index c11c9ebb9cc4a884485d4b6af67819054388db1d..4a5b4e538640408fc80c153a6564f29ad8bb4385 100644 --- a/ee/spec/lib/ee/gitlab/gon_helper_spec.rb +++ b/ee/spec/lib/ee/gitlab/gon_helper_spec.rb @@ -18,6 +18,8 @@ def current_user before do allow(helper).to receive(:gon).and_return(gon) + allow(helper).to receive(:push_to_gon_attributes).and_return(nil) + allow(helper).to receive(:current_user).and_return(create(:user)) end it 'includes ee exclusive settings' do @@ -26,6 +28,17 @@ def current_user helper.add_gon_variables end + it 'adds AI gon attributes' do + ai_chat = { + total_model_token: ::Llm::ExplainCodeService::TOTAL_MODEL_TOKEN_LIMIT, + max_response_token: ::Llm::ExplainCodeService::MAX_RESPONSE_TOKENS, + input_content_limit: ::Llm::ExplainCodeService::INPUT_CONTENT_LIMIT + } + helper.add_gon_variables + + expect(helper).to have_received(:push_to_gon_attributes).with('ai', 'chat', ai_chat) + end + context 'when GitLab.com' do before do allow(Gitlab).to receive(:com?).and_return(true)