Skip to content
代码片段 群组 项目
未验证 提交 c8bf16f1 编辑于 作者: Mark Chao's avatar Mark Chao 提交者: GitLab
浏览文件

Add label table

Remove <?xml> tag
上级 0bad7ee0
No related branches found
No related tags found
无相关合并请求
......@@ -5,20 +5,30 @@ module Llm
module Anthropic
module Completions
class CategorizeQuestion < Gitlab::Llm::Completions::Base
SCHEMA_URL = 'iglu:com.gitlab/ai_question_category/jsonschema/1-0-0'
SCHEMA_URL = 'iglu:com.gitlab/ai_question_category/jsonschema/1-1-0'
private_class_method def self.load_xml(filename)
File.read(File.join(File.dirname(__FILE__), '..', '..', 'fixtures', filename)).tr("\n", '')
end
LLM_MATCHING_CATEGORIES_XML = load_xml('categories.xml') # mandatory category definition
LLM_MATCHING_LABELS_XML = load_xml('labels.xml') # boolean attribute definitions
REQUIRED_KEYS = %w[detailed_category category].freeze
OPTIONAL_KEYS = [].freeze
OPTIONAL_KEYS = (
%w[language] +
Hash.from_xml(LLM_MATCHING_LABELS_XML)
.dig('root', 'label').pluck('type') # rubocop:disable CodeReuse/ActiveRecord -- Array#pluck
).freeze
PERMITTED_KEYS = REQUIRED_KEYS + OPTIONAL_KEYS
def execute
@ai_client = ::Gitlab::Llm::Anthropic::Client.new(user, tracking_context: tracking_context)
response = response_for(user, options)
@storage = ::Gitlab::Llm::ChatStorage.new(user)
@messages = @storage.messages_up_to(options[:message_id])
@logger = Gitlab::Llm::Logger.build
result = process_response(response, user)
if result
if track(user, attributes_from_llm)
ResponseModifiers::CategorizeQuestion.new(nil)
else
ResponseModifiers::CategorizeQuestion.new(error: 'Event not tracked')
......@@ -27,10 +37,7 @@ def execute
private
def response_for(user, options)
template = ai_prompt_class.new(user, options)
request(template)
end
attr_reader :messages
def request(template)
@ai_client.complete(
......@@ -38,28 +45,31 @@ def request(template)
)&.dig("completion").to_s.strip
end
def process_response(response, user)
json = Gitlab::Json.parse(response)
return false unless json
def attributes_from_llm
template = ai_prompt_class.new(messages, options)
data = Gitlab::Json.parse(request(template)) || {}
track(user, json)
# Turn array of matched label strings into boolean attributes
labels = data.delete('labels')
labels&.each { |label| data[label] = true }
data
rescue JSON::ParserError
error_message = "JSON has an invalid format."
@logger.error(message: "Error", class: self.class.to_s, error: error_message)
false
{}
end
def track(user, json)
unless contains_categories?(json)
def track(user, attributes)
return false if attributes.empty?
unless contains_categories?(attributes)
error_message = 'Response did not contain defined categories'
@logger.error(message: "Error", class: self.class.to_s, error: error_message)
return false
end
context = SnowplowTracker::SelfDescribingJson.new(SCHEMA_URL, json.slice(*PERMITTED_KEYS))
context = SnowplowTracker::SelfDescribingJson.new(SCHEMA_URL, attributes.slice(*PERMITTED_KEYS))
Gitlab::Tracking.event(
self.class.to_s,
......@@ -70,9 +80,9 @@ def track(user, json)
)
end
def contains_categories?(json)
def contains_categories?(hash)
REQUIRED_KEYS.each do |key|
return false unless json.has_key?(key)
return false unless hash.has_key?(key)
end
end
end
......
<?xml version="1.0" encoding="UTF-8"?><root><row>
<root><row>
<category>Documentation about GitLab</category>
<detailed_category>Question about GitLab Duo such as "what can you do?", "how can you help me?"</detailed_category>
......
<root>
<label><type>contains_rejection</type><desc>User indicates that the answer they received is incorrect, incomplete, or unsatisfactory</desc></label>
<label><type>contains_rejection_previous_answer_incorrect</type><desc>User indicates that the answer they received is incorrect</desc></label>
<label><type>contains_rejection_previous_answer_incomplete</type><desc>User indicates that the answer they received is incomplete</desc></label>
<label><type>contains_rejection_previous_answer_unsatisfactory</type><desc>User indicates that the answer they received is unsatisfactory</desc></label>
<label><type>is_follow_up_question</type><desc>The user asks a follow-up question to seek additional information or clarification in response to a previous answer</desc></label>
<label><type>contains_clarification</type><desc>The user seems to not have understood the previous answer and asks a follow-up question to clarify this</desc></label>
<label><type>contains_intellectual_property</type><desc>User has input intellectual property such as copyrighted material, trademarks or company secrets</desc></label>
<label><type>contains_credentials</type><desc>User has input credentials such as usernames, passwords, tokens, and other things that can be used to authenticate to digital systems</desc></label>
<label><type>contains_code</type><desc>Question contains code written in a programming language</desc></label>
<label><type>contains_personal_information</type><desc>User has input personally identifiable information (PII) such as names, email addresses, phone numbers, or credit card numbers</desc></label>
<label><type>compares_two_things</type><desc>User has requested chat to compare two things against each other</desc></label>
<label><type>compares_more_than_two_things</type><desc>User has requested chat to compare two or more things against each other</desc></label>
<label><type>requests_answer_in_certain_form</type><desc>User requests the chat to return the answer in a certain form, for example: short, long, bulleted list, containing a code snippet, formal, informal</desc></label>
<label><type>contains_request_to_format_the_answer</type><desc>User requests the chat to return the answer in a certain format, for example: XML, JSON, HTML, markdown</desc></label>
<label><type>is_related_to_gitlab</type><desc>User's question is related to GitLab, GitLab features, or how to use GitLab</desc></label>
<label><type>is_related_to_gitlab_data</type><desc>User's question is related to data in GitLab, such as the content of an issue, epic, code file, MR, or pipeline</desc></label>
<label><type>is_related_to_devsecops</type><desc>User's question relates to DevOps or DevSecOps, for example continuous integration and continuous deployment (CI/CD) pipelines, security testing tools, code scanning and review, threat modeling, security training for development teams, and automated compliance checks</desc></label>
<label><type>is_poorly_formulated</type><desc>The user has composed a question that is poorly formulated and/or ambiguous</desc></label>
</root>
......@@ -6,41 +6,48 @@ module Templates
class CategorizeQuestion
include Gitlab::Utils::StrongMemoize
def initialize(user, params = {})
@user = user
@params = params
end
PROMPT = ERB.new(<<~PROMPT)
\n\nHuman: You are helpful assistant, ready to give as accurate answer as possible in JSON format.
def to_prompt
prompt = <<~PROMPT
\n\nHuman: You are helpful assistant, ready to give as accurate answer as possible in JSON format.
Based on the information below (user input, <% if previous_answer %>previous answer, <% end %>categories, labels, language), classify user input's category, detailed_category, labels. There may be multiple labels. Don't provide clarification or explanation. Always return only a JSON hash, e.g.:
<example>{"category": "Write, improve, or explain code", "detailed_category": "What are the potential security risks in this code?", "labels": ["contains_credentials", "contains_rejection_previous_answer_incorrect"], "language": "en"}</example>
<example>{"category": "Documentation about GitLab", "detailed_category": "Documentation about GitLab", "labels": [], "language": "ja"}</example>
Given categories below (formatted with XML) return category and detailed_category of question below. Question is prefixed by "q".
<% if previous_answer %>
Previous answer:
<answer><%= previous_answer %></answer>
<% end %>
Categories XML:
%<categories>s
User input:
<input><%= question %></input>
q: %<question>s
Categories:
<%= ::Gitlab::Llm::Anthropic::Completions::CategorizeQuestion::LLM_MATCHING_CATEGORIES_XML %>
Return category and detailed category, always using JSON format. Example of said JSON:
"{"category": "Write, improve, or explain code", "detailed_category": "What are the potential security risks in this code?" }".
Labels:
<%= ::Gitlab::Llm::Anthropic::Completions::CategorizeQuestion::LLM_MATCHING_LABELS_XML %>
Always return only JSON structure.
Assistant:
PROMPT
Assistant:
JSON:
PROMPT
def initialize(messages, params = {})
@messages = messages
@params = params
end
format(prompt, question: params[:question], categories: categories_parsed_file)
def to_prompt
previous_message = messages[-2]
previous_answer = previous_message&.assistant? ? previous_message.content : nil
PROMPT.result_with_hash(
question: params[:question],
previous_answer: previous_answer
)
end
private
attr_reader :user, :params
def categories_parsed_file
File.read(File.join(File.dirname(__FILE__), '..', 'fixtures', 'categories.xml'))
end
attr_reader :params, :messages
end
end
end
......
......@@ -6,27 +6,40 @@
describe '#execute' do
let(:user) { build(:user) }
let(:ai_client) { ::Gitlab::Llm::Anthropic::Client.new(nil) }
let(:response) { { 'completion' => answer.to_s } }
let(:response) { { 'completion' => llm_analysis_response.to_s } }
let(:llm_analysis_response) do
{
detailed_category: "Summarize issue",
category: 'Summarize something',
labels: %w[contains_code is_related_to_gitlab],
language: 'en',
extra: 'foo'
}.to_json
end
let(:prompt_message) do
build(:ai_message, :categorize_question, user: user, resource: user, request_id: 'uuid')
end
let(:options) { { question: 'What is the pipeline?' } }
let(:message_id) { '<message_id>' }
let(:options) { { question: 'What is the pipeline?', message_id: message_id } }
let(:template_class) { ::Gitlab::Llm::Templates::CategorizeQuestion }
let(:prompt) { '<prompt>' }
subject(:categorize_action) do
described_class.new(prompt_message, ::Gitlab::Llm::Templates::CategorizeQuestion, **options).execute
described_class.new(prompt_message, template_class, **options).execute
end
before do
allow_next_instance_of(template_class) do |template|
allow(template).to receive(:to_prompt).and_return(prompt)
end
allow_next_instance_of(::Gitlab::Llm::Anthropic::Client) do |ai_client|
allow(ai_client).to receive(:complete).and_return(response)
allow(ai_client).to receive(:complete).with(prompt: prompt).and_return(response)
end
end
context 'with valid response' do
let(:answer) { { detailed_category: "Summarize issue", category: 'Summarize something' }.to_json }
it 'tracks event' do
expect(categorize_action.errors).to be_empty
......@@ -37,14 +50,20 @@
user: user,
context: [{
schema: described_class::SCHEMA_URL,
data: { 'detailed_category' => "Summarize issue", 'category' => 'Summarize something' }
data: {
'detailed_category' => "Summarize issue",
'category' => 'Summarize something',
'contains_code' => true,
"is_related_to_gitlab" => true,
'language' => 'en'
}
}]
)
end
end
context 'with incomplete response' do
let(:answer) { { category: 'Summarize something' }.to_json }
let(:llm_analysis_response) { { category: 'Summarize something' }.to_json }
it 'does not track event' do
expect(categorize_action.errors).to include('Event not tracked')
......@@ -54,13 +73,13 @@
action: 'ai_question_category',
property: 'uuid',
user: user,
context: []
context: anything
)
end
end
context 'with invalid response' do
let(:answer) { "invalid" }
let(:llm_analysis_response) { "invalid" }
it 'does not track event' do
expect(categorize_action.errors).to include('Event not tracked')
......@@ -70,7 +89,7 @@
action: 'ai_question_category',
property: 'uuid',
user: user,
context: []
context: anything
)
end
end
......
......@@ -3,10 +3,10 @@
require 'spec_helper'
RSpec.describe Gitlab::Llm::Templates::CategorizeQuestion, feature_category: :duo_chat do
let(:user) { build(:user) }
let(:messages) { [] }
let(:question) { 'what is the issue' }
subject { described_class.new(user, { question: question }) }
subject { described_class.new(messages, { question: question }) }
describe '#to_prompt' do
it 'includes question' do
......@@ -15,10 +15,34 @@
expect(prompt).to include(question)
end
it 'includes xml part' do
it 'includes xmls' do
prompt = subject.to_prompt
expect(prompt).to include('<?xml version="1.0" encoding="UTF-8"?><root><row>')
expect(prompt).to include("Categories:\n<root>")
expect(prompt).to include("Labels:\n<root>")
end
context 'when previous answer is absent' do
it 'does not include previous answer' do
prompt = subject.to_prompt
expect(prompt).not_to include("Previous answer:\n<answer>")
end
end
context 'when previous answer is present' do
let(:messages) do
[
instance_double(Gitlab::Llm::ChatMessage, assistant?: true, content: '<LLM answer>'),
instance_double(Gitlab::Llm::ChatMessage, assistant?: false, content: '<user input>')
]
end
it 'includes previous answer' do
prompt = subject.to_prompt
expect(prompt).to include("Previous answer:\n<answer>")
end
end
end
end
0% 加载中 .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册