diff --git a/ee/lib/gitlab/llm/chain/tools/troubleshoot_job/executor.rb b/ee/lib/gitlab/llm/chain/tools/troubleshoot_job/executor.rb index 124b54e6da9369b3bc29d7c15bf24b7dd983ae4f..293b862c09a6f2709057c1f0fd9adff42e3e3587 100644 --- a/ee/lib/gitlab/llm/chain/tools/troubleshoot_job/executor.rb +++ b/ee/lib/gitlab/llm/chain/tools/troubleshoot_job/executor.rb @@ -10,6 +10,12 @@ class Executor < SlashCommandTool include Concerns::AiDependent include ::Gitlab::Utils::StrongMemoize + # We use 1 Charater per 1 Token because we can't copy the tokenizer logic easily + # So we go lower the characters per token to compensate for that. + # For more context see: https://github.com/javirandor/anthropic-tokenizer and + # https://gitlab.com/gitlab-org/gitlab/-/issues/474146 + APPROX_MAX_INPUT_CHARS = 100_000 + NAME = 'TroubleshootJob' RESOURCE_NAME = 'Ci::Build' HUMAN_NAME = 'Troubleshoot Job' @@ -102,7 +108,7 @@ def disabled? def selected_text_options { - selected_text: job_log, + selected_text: truncated_job_log, language_info: language_info } end @@ -115,6 +121,20 @@ def job_log # ~25000 tokens job.trace.raw(last_lines: 1000) end + strong_memoize_attr :job_log + + def truncated_job_log + log_size_allowed = APPROX_MAX_INPUT_CHARS - prompt_size_without_log + job_log.last(log_size_allowed) + end + + def user_prompt + PROMPT_TEMPLATE[1][1] + end + + def prompt_size_without_log + user_prompt.size + end def job context.resource diff --git a/ee/spec/lib/gitlab/llm/chain/tools/troubleshoot_job/executor_spec.rb b/ee/spec/lib/gitlab/llm/chain/tools/troubleshoot_job/executor_spec.rb index bef622b4a4668f13deac1cc866e5c545738e742e..0ef0cac814d5d5509106243216d34ad2cd69357f 100644 --- a/ee/spec/lib/gitlab/llm/chain/tools/troubleshoot_job/executor_spec.rb +++ b/ee/spec/lib/gitlab/llm/chain/tools/troubleshoot_job/executor_spec.rb @@ -104,7 +104,27 @@ expect(prompt_class).to receive(:prompt).with(a_hash_including( input: input, language_info: "The repository code is written in C and C++.", - selected_text: build.trace.raw + selected_text: build.trace.raw # "BUILD TRACE" + )) + + tool.execute + end + end + + context 'when log is truncated' do + let(:log_size_allowed) { 3 } + + before do + stub_const("#{described_class}::APPROX_MAX_INPUT_CHARS", + described_class::PROMPT_TEMPLATE[1][1].size + log_size_allowed) + end + + it 'calls prompt with correct params' do + allow(tool).to receive(:provider_prompt_class).and_return(prompt_class) + expect(prompt_class).to receive(:prompt).with(a_hash_including( + input: input, + language_info: '', + selected_text: build.trace.raw.last(log_size_allowed) # ACE )) tool.execute