From 8fa0f88043240dfce880279f10c2f2d63cdf12dc Mon Sep 17 00:00:00 2001 From: Jan Provaznik <jprovaznik@gitlab.com> Date: Mon, 4 Dec 2023 14:23:13 +0000 Subject: [PATCH] Use Chat class for real chat reqeusts Instead of calling zero shot agent directly when testing real requests, we rather call upper Chat class. This will allow us to test also newly added slash commands. --- doc/development/ai_features/duo_chat.md | 4 +- .../chat_real_requests_spec.rb} | 53 ++++++++----------- 2 files changed, 24 insertions(+), 33 deletions(-) rename ee/spec/lib/gitlab/llm/{chain/agents/zero_shot/executor_real_requests_spec.rb => completions/chat_real_requests_spec.rb} (92%) diff --git a/doc/development/ai_features/duo_chat.md b/doc/development/ai_features/duo_chat.md index dbccb6c98001..d7f88997fca8 100644 --- a/doc/development/ai_features/duo_chat.md +++ b/doc/development/ai_features/duo_chat.md @@ -101,7 +101,7 @@ export ANTHROPIC_API_KEY='<key>' # can use dev value of Gitlab::CurrentSettings export VERTEX_AI_CREDENTIALS='<vertex-ai-credentials>' # can set as dev value of Gitlab::CurrentSettings.vertex_ai_credentials export VERTEX_AI_PROJECT='<vertex-project-name>' # can use dev value of Gitlab::CurrentSettings.vertex_ai_project -REAL_AI_REQUEST=1 bundle exec rspec ee/spec/lib/gitlab/llm/chain/agents/zero_shot/executor_real_requests_spec.rb +REAL_AI_REQUEST=1 bundle exec rspec ee/spec/lib/gitlab/llm/completions/chat_real_requests_spec.rb ``` When you need to update the test questions that require documentation embeddings, @@ -112,7 +112,7 @@ make sure a new fixture is generated and committed together with the change. The following CI jobs for GitLab project run the rspecs tagged with `real_ai_request`: - `rspec-ee unit gitlab-duo-chat-zeroshot`: - the job runs `ee/spec/lib/gitlab/llm/chain/agents/zero_shot/executor_real_requests_spec.rb`. + the job runs `ee/spec/lib/gitlab/llm/completions/chat_real_requests_spec.rb`. The job is optionally triggered and allowed to fail. - `rspec-ee unit gitlab-duo-chat-qa`: diff --git a/ee/spec/lib/gitlab/llm/chain/agents/zero_shot/executor_real_requests_spec.rb b/ee/spec/lib/gitlab/llm/completions/chat_real_requests_spec.rb similarity index 92% rename from ee/spec/lib/gitlab/llm/chain/agents/zero_shot/executor_real_requests_spec.rb rename to ee/spec/lib/gitlab/llm/completions/chat_real_requests_spec.rb index b91dec795668..904839a87695 100644 --- a/ee/spec/lib/gitlab/llm/chain/agents/zero_shot/executor_real_requests_spec.rb +++ b/ee/spec/lib/gitlab/llm/completions/chat_real_requests_spec.rb @@ -2,7 +2,7 @@ require 'spec_helper' -RSpec.describe Gitlab::Llm::Chain::Agents::ZeroShot::Executor, :clean_gitlab_redis_chat, feature_category: :duo_chat do +RSpec.describe Gitlab::Llm::Completions::Chat, :clean_gitlab_redis_chat, feature_category: :duo_chat do include FakeBlobHelpers let_it_be(:user) { create(:user) } @@ -17,27 +17,19 @@ let(:resource) { nil } let(:extra_resource) { {} } let(:current_file) { nil } + let(:options) do + { extra_resource: extra_resource, current_file: current_file } + end let(:executor) do - ai_request = ::Gitlab::Llm::Chain::Requests::Anthropic.new(user) - context = ::Gitlab::Llm::Chain::GitlabContext.new( - current_user: user, - container: resource.try(:resource_parent)&.root_ancestor, - resource: resource, - ai_request: ai_request, - extra_resource: extra_resource, - current_file: current_file + message = ::Gitlab::Llm::ChatMessage.new( + 'user' => user, + 'content' => input, + 'role' => 'user', + 'context' => build(:ai_chat_message, user: user, content: input, resource: resource) ) - all_tools = Gitlab::Llm::Completions::Chat::TOOLS.dup - all_tools << ::Gitlab::Llm::Chain::Tools::CiEditorAssistant - - described_class.new( - user_input: input, - tools: all_tools, - context: context, - response_handler: response_service_double - ) + described_class.new(message, ::Gitlab::Llm::Completions::Chat, options) end before_all do @@ -56,7 +48,7 @@ answer = executor.execute expect(executor.context).to match_llm_tools(tools) - expect(answer.content).to match_llm_answer(answer_match) + expect(answer.response_body).to match_llm_answer(answer_match) end end @@ -128,14 +120,14 @@ context 'with issue reference' do let(:input) { format(input_template, issue_identifier: "the issue #{issue.to_reference(full: true)}") } - # rubocop: disable Layout/LineLength + # rubocop: disable Layout/LineLength -- keep table structure readable where(:input_template, :tools, :answer_match) do 'Please summarize %<issue_identifier>s' | %w[IssueIdentifier ResourceReader] | /reliability/ 'Summarize %<issue_identifier>s with bullet points' | %w[IssueIdentifier ResourceReader] | /reliability/ 'Can you list all the labels on %<issue_identifier>s?' | %w[IssueIdentifier ResourceReader] | /ai-enablement/ 'How old is %<issue_identifier>s?' | %w[IssueIdentifier ResourceReader] | /2 days/ 'How many days ago %<issue_identifier>s was created?' | %w[IssueIdentifier ResourceReader] | // - 'For which milestone is %<issue_identifier>s? And how long until then' | %w[IssueIdentifier ResourceReader] | lazy { /(#{milestone&.title}|due date.*#{due_date.strftime("%Y-%m-%d")})/ } + 'For which milestone is %<issue_identifier>s? And how long until then' | %w[IssueIdentifier ResourceReader] | lazy { /(#{milestone&.title}|due date.*#{due_date.strftime('%Y-%m-%d')})/ } 'Summarize the comments from %<issue_identifier>s into bullet points' | %w[IssueIdentifier ResourceReader] | /latency/ 'What should be the final solution for %<issue_identifier>s?' | %w[IssueIdentifier ResourceReader] | /solution/ end @@ -150,13 +142,13 @@ let(:resource) { issue } let(:input) { format(input_template, issue_identifier: "this issue") } - # rubocop: disable Layout/LineLength + # rubocop: disable Layout/LineLength -- keep table structure readable where(:input_template, :tools, :answer_match) do 'Please summarize %<issue_identifier>s' | %w[IssueIdentifier ResourceReader] | // 'Can you list all the labels on %<issue_identifier>s?' | %w[IssueIdentifier ResourceReader] | /ai-enablement/ 'How old is %<issue_identifier>s?' | %w[IssueIdentifier ResourceReader] | /2 days/ 'How many days ago %<issue_identifier>s was created?' | %w[IssueIdentifier ResourceReader] | // - 'For which milestone is %<issue_identifier>s? And how long until then' | %w[IssueIdentifier ResourceReader] | lazy { /(#{milestone&.title}|due date.*#{due_date.strftime("%Y-%m-%d")})/ } + 'For which milestone is %<issue_identifier>s? And how long until then' | %w[IssueIdentifier ResourceReader] | lazy { /(#{milestone&.title}|due date.*#{due_date.strftime('%Y-%m-%d')})/ } 'What should be the final solution for %<issue_identifier>s?' | %w[IssueIdentifier ResourceReader] | /solution/ end # rubocop: enable Layout/LineLength @@ -198,7 +190,7 @@ note: '+1, our company will also use this to manage our projects!') end - # rubocop: disable Layout/LineLength + # rubocop: disable Layout/LineLength -- keep table structure readable where(:input_template, :tools, :answer_match) do # evaluation of questions which involve processing of other resources is not reliable yet # because both IssueIdentifier and JsonReader tools assume we work with single resource: @@ -239,7 +231,7 @@ ] end - # rubocop: disable Layout/LineLength + # rubocop: disable Layout/LineLength -- keep table structure readable where(:input_template, :tools, :answer_match) do 'Can you sort this list by the number of users that have requested the use case and include the number for each use case? Can you include a verbatim for the two most requested use cases that reflect the general opinion of commenters for these two use cases?' | %w[] | /test|manage/ end @@ -255,7 +247,7 @@ end context 'when asking to explain code' do - # rubocop: disable Layout/LineLength + # rubocop: disable Layout/LineLength -- keep table structure readable where(:input_template, :tools, :answer_match) do # NOTE: `tools: []` is the correct expected value. # There is no tool for explaining a code and the LLM answers the question directly. @@ -313,13 +305,14 @@ context 'with epic reference' do let(:input) { format(input_template, epic_identifier: "the epic #{epic.to_reference(full: true)}") } - # rubocop: disable Layout/LineLength + # rubocop: disable Layout/LineLength -- keep table structure readable where(:input_template, :tools, :answer_match) do 'Please summarize %<epic_identifier>s' | %w[EpicIdentifier ResourceReader] | // 'Can you list all labels on %{epic_identifier} epic?' | %w[EpicIdentifier ResourceReader] | /ai-framework/ 'How old is %<epic_identifier>s?' | %w[EpicIdentifier ResourceReader] | /5 days/ 'How many days ago was %<epic_identifier>s epic created?' | %w[EpicIdentifier ResourceReader] | /5 days/ end + # rubocop: enable Layout/LineLength with_them do it_behaves_like 'successful prompt processing' @@ -329,12 +322,10 @@ context 'with `this epic`' do let(:resource) { epic } - # rubocop: disable Layout/LineLength where(:input_template, :tools, :answer_match) do 'Can you list all labels on this epic?' | %w[EpicIdentifier ResourceReader] | /ai-framework/ 'How many days ago was current epic created?' | %w[EpicIdentifier ResourceReader] | /5 days/ end - # rubocop: enable Layout/LineLength with_them do let(:input) { input_template } @@ -365,7 +356,7 @@ end end - # rubocop: disable Layout/LineLength + # rubocop: disable Layout/LineLength -- keep table structure readable where(:input_template, :tools, :answer_match) do # evaluation of questions which involve processing of other resources is not reliable yet # because both EpicIdentifier and JsonReader tools assume we work with single resource: @@ -411,7 +402,7 @@ it 'answers question about a name', :aggregate_failures do answer = executor.execute - expect(answer.content).to match_llm_answer('GitLab Duo Chat') + expect(answer.response_body).to match_llm_answer('GitLab Duo Chat') end end -- GitLab