diff --git a/doc/development/ai_features/duo_chat.md b/doc/development/ai_features/duo_chat.md index dbccb6c98001de4c87a99552ace7e899519fe453..d7f88997fca829c523ffe017efbf6d775b31ae6d 100644 --- a/doc/development/ai_features/duo_chat.md +++ b/doc/development/ai_features/duo_chat.md @@ -101,7 +101,7 @@ export ANTHROPIC_API_KEY='<key>' # can use dev value of Gitlab::CurrentSettings export VERTEX_AI_CREDENTIALS='<vertex-ai-credentials>' # can set as dev value of Gitlab::CurrentSettings.vertex_ai_credentials export VERTEX_AI_PROJECT='<vertex-project-name>' # can use dev value of Gitlab::CurrentSettings.vertex_ai_project -REAL_AI_REQUEST=1 bundle exec rspec ee/spec/lib/gitlab/llm/chain/agents/zero_shot/executor_real_requests_spec.rb +REAL_AI_REQUEST=1 bundle exec rspec ee/spec/lib/gitlab/llm/completions/chat_real_requests_spec.rb ``` When you need to update the test questions that require documentation embeddings, @@ -112,7 +112,7 @@ make sure a new fixture is generated and committed together with the change. The following CI jobs for GitLab project run the rspecs tagged with `real_ai_request`: - `rspec-ee unit gitlab-duo-chat-zeroshot`: - the job runs `ee/spec/lib/gitlab/llm/chain/agents/zero_shot/executor_real_requests_spec.rb`. + the job runs `ee/spec/lib/gitlab/llm/completions/chat_real_requests_spec.rb`. The job is optionally triggered and allowed to fail. - `rspec-ee unit gitlab-duo-chat-qa`: diff --git a/ee/spec/lib/gitlab/llm/chain/agents/zero_shot/executor_real_requests_spec.rb b/ee/spec/lib/gitlab/llm/completions/chat_real_requests_spec.rb similarity index 92% rename from ee/spec/lib/gitlab/llm/chain/agents/zero_shot/executor_real_requests_spec.rb rename to ee/spec/lib/gitlab/llm/completions/chat_real_requests_spec.rb index b91dec7956682d7ba9bd1d197860e7da30d4b505..904839a87695491d2d404224be4fcd3ff4a53458 100644 --- a/ee/spec/lib/gitlab/llm/chain/agents/zero_shot/executor_real_requests_spec.rb +++ b/ee/spec/lib/gitlab/llm/completions/chat_real_requests_spec.rb @@ -2,7 +2,7 @@ require 'spec_helper' -RSpec.describe Gitlab::Llm::Chain::Agents::ZeroShot::Executor, :clean_gitlab_redis_chat, feature_category: :duo_chat do +RSpec.describe Gitlab::Llm::Completions::Chat, :clean_gitlab_redis_chat, feature_category: :duo_chat do include FakeBlobHelpers let_it_be(:user) { create(:user) } @@ -17,27 +17,19 @@ let(:resource) { nil } let(:extra_resource) { {} } let(:current_file) { nil } + let(:options) do + { extra_resource: extra_resource, current_file: current_file } + end let(:executor) do - ai_request = ::Gitlab::Llm::Chain::Requests::Anthropic.new(user) - context = ::Gitlab::Llm::Chain::GitlabContext.new( - current_user: user, - container: resource.try(:resource_parent)&.root_ancestor, - resource: resource, - ai_request: ai_request, - extra_resource: extra_resource, - current_file: current_file + message = ::Gitlab::Llm::ChatMessage.new( + 'user' => user, + 'content' => input, + 'role' => 'user', + 'context' => build(:ai_chat_message, user: user, content: input, resource: resource) ) - all_tools = Gitlab::Llm::Completions::Chat::TOOLS.dup - all_tools << ::Gitlab::Llm::Chain::Tools::CiEditorAssistant - - described_class.new( - user_input: input, - tools: all_tools, - context: context, - response_handler: response_service_double - ) + described_class.new(message, ::Gitlab::Llm::Completions::Chat, options) end before_all do @@ -56,7 +48,7 @@ answer = executor.execute expect(executor.context).to match_llm_tools(tools) - expect(answer.content).to match_llm_answer(answer_match) + expect(answer.response_body).to match_llm_answer(answer_match) end end @@ -128,14 +120,14 @@ context 'with issue reference' do let(:input) { format(input_template, issue_identifier: "the issue #{issue.to_reference(full: true)}") } - # rubocop: disable Layout/LineLength + # rubocop: disable Layout/LineLength -- keep table structure readable where(:input_template, :tools, :answer_match) do 'Please summarize %<issue_identifier>s' | %w[IssueIdentifier ResourceReader] | /reliability/ 'Summarize %<issue_identifier>s with bullet points' | %w[IssueIdentifier ResourceReader] | /reliability/ 'Can you list all the labels on %<issue_identifier>s?' | %w[IssueIdentifier ResourceReader] | /ai-enablement/ 'How old is %<issue_identifier>s?' | %w[IssueIdentifier ResourceReader] | /2 days/ 'How many days ago %<issue_identifier>s was created?' | %w[IssueIdentifier ResourceReader] | // - 'For which milestone is %<issue_identifier>s? And how long until then' | %w[IssueIdentifier ResourceReader] | lazy { /(#{milestone&.title}|due date.*#{due_date.strftime("%Y-%m-%d")})/ } + 'For which milestone is %<issue_identifier>s? And how long until then' | %w[IssueIdentifier ResourceReader] | lazy { /(#{milestone&.title}|due date.*#{due_date.strftime('%Y-%m-%d')})/ } 'Summarize the comments from %<issue_identifier>s into bullet points' | %w[IssueIdentifier ResourceReader] | /latency/ 'What should be the final solution for %<issue_identifier>s?' | %w[IssueIdentifier ResourceReader] | /solution/ end @@ -150,13 +142,13 @@ let(:resource) { issue } let(:input) { format(input_template, issue_identifier: "this issue") } - # rubocop: disable Layout/LineLength + # rubocop: disable Layout/LineLength -- keep table structure readable where(:input_template, :tools, :answer_match) do 'Please summarize %<issue_identifier>s' | %w[IssueIdentifier ResourceReader] | // 'Can you list all the labels on %<issue_identifier>s?' | %w[IssueIdentifier ResourceReader] | /ai-enablement/ 'How old is %<issue_identifier>s?' | %w[IssueIdentifier ResourceReader] | /2 days/ 'How many days ago %<issue_identifier>s was created?' | %w[IssueIdentifier ResourceReader] | // - 'For which milestone is %<issue_identifier>s? And how long until then' | %w[IssueIdentifier ResourceReader] | lazy { /(#{milestone&.title}|due date.*#{due_date.strftime("%Y-%m-%d")})/ } + 'For which milestone is %<issue_identifier>s? And how long until then' | %w[IssueIdentifier ResourceReader] | lazy { /(#{milestone&.title}|due date.*#{due_date.strftime('%Y-%m-%d')})/ } 'What should be the final solution for %<issue_identifier>s?' | %w[IssueIdentifier ResourceReader] | /solution/ end # rubocop: enable Layout/LineLength @@ -198,7 +190,7 @@ note: '+1, our company will also use this to manage our projects!') end - # rubocop: disable Layout/LineLength + # rubocop: disable Layout/LineLength -- keep table structure readable where(:input_template, :tools, :answer_match) do # evaluation of questions which involve processing of other resources is not reliable yet # because both IssueIdentifier and JsonReader tools assume we work with single resource: @@ -239,7 +231,7 @@ ] end - # rubocop: disable Layout/LineLength + # rubocop: disable Layout/LineLength -- keep table structure readable where(:input_template, :tools, :answer_match) do 'Can you sort this list by the number of users that have requested the use case and include the number for each use case? Can you include a verbatim for the two most requested use cases that reflect the general opinion of commenters for these two use cases?' | %w[] | /test|manage/ end @@ -255,7 +247,7 @@ end context 'when asking to explain code' do - # rubocop: disable Layout/LineLength + # rubocop: disable Layout/LineLength -- keep table structure readable where(:input_template, :tools, :answer_match) do # NOTE: `tools: []` is the correct expected value. # There is no tool for explaining a code and the LLM answers the question directly. @@ -313,13 +305,14 @@ context 'with epic reference' do let(:input) { format(input_template, epic_identifier: "the epic #{epic.to_reference(full: true)}") } - # rubocop: disable Layout/LineLength + # rubocop: disable Layout/LineLength -- keep table structure readable where(:input_template, :tools, :answer_match) do 'Please summarize %<epic_identifier>s' | %w[EpicIdentifier ResourceReader] | // 'Can you list all labels on %{epic_identifier} epic?' | %w[EpicIdentifier ResourceReader] | /ai-framework/ 'How old is %<epic_identifier>s?' | %w[EpicIdentifier ResourceReader] | /5 days/ 'How many days ago was %<epic_identifier>s epic created?' | %w[EpicIdentifier ResourceReader] | /5 days/ end + # rubocop: enable Layout/LineLength with_them do it_behaves_like 'successful prompt processing' @@ -329,12 +322,10 @@ context 'with `this epic`' do let(:resource) { epic } - # rubocop: disable Layout/LineLength where(:input_template, :tools, :answer_match) do 'Can you list all labels on this epic?' | %w[EpicIdentifier ResourceReader] | /ai-framework/ 'How many days ago was current epic created?' | %w[EpicIdentifier ResourceReader] | /5 days/ end - # rubocop: enable Layout/LineLength with_them do let(:input) { input_template } @@ -365,7 +356,7 @@ end end - # rubocop: disable Layout/LineLength + # rubocop: disable Layout/LineLength -- keep table structure readable where(:input_template, :tools, :answer_match) do # evaluation of questions which involve processing of other resources is not reliable yet # because both EpicIdentifier and JsonReader tools assume we work with single resource: @@ -411,7 +402,7 @@ it 'answers question about a name', :aggregate_failures do answer = executor.execute - expect(answer.content).to match_llm_answer('GitLab Duo Chat') + expect(answer.response_body).to match_llm_answer('GitLab Duo Chat') end end