Skip to content
代码片段 群组 项目
提交 8fa0f880 编辑于 作者: Jan Provaznik's avatar Jan Provaznik 提交者: Pavel Shutsin
浏览文件

Use Chat class for real chat reqeusts

Instead of calling zero shot agent directly when testing real requests,
we rather call upper Chat class. This will allow us to test also newly
added slash commands.
上级 747d9de4
No related branches found
No related tags found
无相关合并请求
......@@ -101,7 +101,7 @@ export ANTHROPIC_API_KEY='<key>' # can use dev value of Gitlab::CurrentSettings
export VERTEX_AI_CREDENTIALS='<vertex-ai-credentials>' # can set as dev value of Gitlab::CurrentSettings.vertex_ai_credentials
export VERTEX_AI_PROJECT='<vertex-project-name>' # can use dev value of Gitlab::CurrentSettings.vertex_ai_project
REAL_AI_REQUEST=1 bundle exec rspec ee/spec/lib/gitlab/llm/chain/agents/zero_shot/executor_real_requests_spec.rb
REAL_AI_REQUEST=1 bundle exec rspec ee/spec/lib/gitlab/llm/completions/chat_real_requests_spec.rb
```
When you need to update the test questions that require documentation embeddings,
......@@ -112,7 +112,7 @@ make sure a new fixture is generated and committed together with the change.
The following CI jobs for GitLab project run the rspecs tagged with `real_ai_request`:
- `rspec-ee unit gitlab-duo-chat-zeroshot`:
the job runs `ee/spec/lib/gitlab/llm/chain/agents/zero_shot/executor_real_requests_spec.rb`.
the job runs `ee/spec/lib/gitlab/llm/completions/chat_real_requests_spec.rb`.
The job is optionally triggered and allowed to fail.
- `rspec-ee unit gitlab-duo-chat-qa`:
......
......@@ -2,7 +2,7 @@
require 'spec_helper'
RSpec.describe Gitlab::Llm::Chain::Agents::ZeroShot::Executor, :clean_gitlab_redis_chat, feature_category: :duo_chat do
RSpec.describe Gitlab::Llm::Completions::Chat, :clean_gitlab_redis_chat, feature_category: :duo_chat do
include FakeBlobHelpers
let_it_be(:user) { create(:user) }
......@@ -17,27 +17,19 @@
let(:resource) { nil }
let(:extra_resource) { {} }
let(:current_file) { nil }
let(:options) do
{ extra_resource: extra_resource, current_file: current_file }
end
let(:executor) do
ai_request = ::Gitlab::Llm::Chain::Requests::Anthropic.new(user)
context = ::Gitlab::Llm::Chain::GitlabContext.new(
current_user: user,
container: resource.try(:resource_parent)&.root_ancestor,
resource: resource,
ai_request: ai_request,
extra_resource: extra_resource,
current_file: current_file
message = ::Gitlab::Llm::ChatMessage.new(
'user' => user,
'content' => input,
'role' => 'user',
'context' => build(:ai_chat_message, user: user, content: input, resource: resource)
)
all_tools = Gitlab::Llm::Completions::Chat::TOOLS.dup
all_tools << ::Gitlab::Llm::Chain::Tools::CiEditorAssistant
described_class.new(
user_input: input,
tools: all_tools,
context: context,
response_handler: response_service_double
)
described_class.new(message, ::Gitlab::Llm::Completions::Chat, options)
end
before_all do
......@@ -56,7 +48,7 @@
answer = executor.execute
expect(executor.context).to match_llm_tools(tools)
expect(answer.content).to match_llm_answer(answer_match)
expect(answer.response_body).to match_llm_answer(answer_match)
end
end
......@@ -128,14 +120,14 @@
context 'with issue reference' do
let(:input) { format(input_template, issue_identifier: "the issue #{issue.to_reference(full: true)}") }
# rubocop: disable Layout/LineLength
# rubocop: disable Layout/LineLength -- keep table structure readable
where(:input_template, :tools, :answer_match) do
'Please summarize %<issue_identifier>s' | %w[IssueIdentifier ResourceReader] | /reliability/
'Summarize %<issue_identifier>s with bullet points' | %w[IssueIdentifier ResourceReader] | /reliability/
'Can you list all the labels on %<issue_identifier>s?' | %w[IssueIdentifier ResourceReader] | /ai-enablement/
'How old is %<issue_identifier>s?' | %w[IssueIdentifier ResourceReader] | /2 days/
'How many days ago %<issue_identifier>s was created?' | %w[IssueIdentifier ResourceReader] | //
'For which milestone is %<issue_identifier>s? And how long until then' | %w[IssueIdentifier ResourceReader] | lazy { /(#{milestone&.title}|due date.*#{due_date.strftime("%Y-%m-%d")})/ }
'For which milestone is %<issue_identifier>s? And how long until then' | %w[IssueIdentifier ResourceReader] | lazy { /(#{milestone&.title}|due date.*#{due_date.strftime('%Y-%m-%d')})/ }
'Summarize the comments from %<issue_identifier>s into bullet points' | %w[IssueIdentifier ResourceReader] | /latency/
'What should be the final solution for %<issue_identifier>s?' | %w[IssueIdentifier ResourceReader] | /solution/
end
......@@ -150,13 +142,13 @@
let(:resource) { issue }
let(:input) { format(input_template, issue_identifier: "this issue") }
# rubocop: disable Layout/LineLength
# rubocop: disable Layout/LineLength -- keep table structure readable
where(:input_template, :tools, :answer_match) do
'Please summarize %<issue_identifier>s' | %w[IssueIdentifier ResourceReader] | //
'Can you list all the labels on %<issue_identifier>s?' | %w[IssueIdentifier ResourceReader] | /ai-enablement/
'How old is %<issue_identifier>s?' | %w[IssueIdentifier ResourceReader] | /2 days/
'How many days ago %<issue_identifier>s was created?' | %w[IssueIdentifier ResourceReader] | //
'For which milestone is %<issue_identifier>s? And how long until then' | %w[IssueIdentifier ResourceReader] | lazy { /(#{milestone&.title}|due date.*#{due_date.strftime("%Y-%m-%d")})/ }
'For which milestone is %<issue_identifier>s? And how long until then' | %w[IssueIdentifier ResourceReader] | lazy { /(#{milestone&.title}|due date.*#{due_date.strftime('%Y-%m-%d')})/ }
'What should be the final solution for %<issue_identifier>s?' | %w[IssueIdentifier ResourceReader] | /solution/
end
# rubocop: enable Layout/LineLength
......@@ -198,7 +190,7 @@
note: '+1, our company will also use this to manage our projects!')
end
# rubocop: disable Layout/LineLength
# rubocop: disable Layout/LineLength -- keep table structure readable
where(:input_template, :tools, :answer_match) do
# evaluation of questions which involve processing of other resources is not reliable yet
# because both IssueIdentifier and JsonReader tools assume we work with single resource:
......@@ -239,7 +231,7 @@
]
end
# rubocop: disable Layout/LineLength
# rubocop: disable Layout/LineLength -- keep table structure readable
where(:input_template, :tools, :answer_match) do
'Can you sort this list by the number of users that have requested the use case and include the number for each use case? Can you include a verbatim for the two most requested use cases that reflect the general opinion of commenters for these two use cases?' | %w[] | /test|manage/
end
......@@ -255,7 +247,7 @@
end
context 'when asking to explain code' do
# rubocop: disable Layout/LineLength
# rubocop: disable Layout/LineLength -- keep table structure readable
where(:input_template, :tools, :answer_match) do
# NOTE: `tools: []` is the correct expected value.
# There is no tool for explaining a code and the LLM answers the question directly.
......@@ -313,13 +305,14 @@
context 'with epic reference' do
let(:input) { format(input_template, epic_identifier: "the epic #{epic.to_reference(full: true)}") }
# rubocop: disable Layout/LineLength
# rubocop: disable Layout/LineLength -- keep table structure readable
where(:input_template, :tools, :answer_match) do
'Please summarize %<epic_identifier>s' | %w[EpicIdentifier ResourceReader] | //
'Can you list all labels on %{epic_identifier} epic?' | %w[EpicIdentifier ResourceReader] | /ai-framework/
'How old is %<epic_identifier>s?' | %w[EpicIdentifier ResourceReader] | /5 days/
'How many days ago was %<epic_identifier>s epic created?' | %w[EpicIdentifier ResourceReader] | /5 days/
end
# rubocop: enable Layout/LineLength
with_them do
it_behaves_like 'successful prompt processing'
......@@ -329,12 +322,10 @@
context 'with `this epic`' do
let(:resource) { epic }
# rubocop: disable Layout/LineLength
where(:input_template, :tools, :answer_match) do
'Can you list all labels on this epic?' | %w[EpicIdentifier ResourceReader] | /ai-framework/
'How many days ago was current epic created?' | %w[EpicIdentifier ResourceReader] | /5 days/
end
# rubocop: enable Layout/LineLength
with_them do
let(:input) { input_template }
......@@ -365,7 +356,7 @@
end
end
# rubocop: disable Layout/LineLength
# rubocop: disable Layout/LineLength -- keep table structure readable
where(:input_template, :tools, :answer_match) do
# evaluation of questions which involve processing of other resources is not reliable yet
# because both EpicIdentifier and JsonReader tools assume we work with single resource:
......@@ -411,7 +402,7 @@
it 'answers question about a name', :aggregate_failures do
answer = executor.execute
expect(answer.content).to match_llm_answer('GitLab Duo Chat')
expect(answer.response_body).to match_llm_answer('GitLab Duo Chat')
end
end
......
0% 加载中 .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册