From 30b4154a46c13d72e06c61ffba35c29d73da1ea8 Mon Sep 17 00:00:00 2001 From: Tetiana Chupryna <tchupryna@gitlab.com> Date: Tue, 18 Feb 2025 16:27:24 +0000 Subject: [PATCH] Remove ZeroShot related classes --- .gitlab/ci/rails.gitlab-ci.yml | 43 -- .rubocop_todo/layout/class_structure.yml | 1 - .rubocop_todo/rspec/be_eq.yml | 1 - .rubocop_todo/rspec/named_subject.yml | 1 - doc/development/ai_features/duo_chat.md | 16 +- doc/development/ai_features/logged_events.md | 2 +- ee/app/models/ai/ai_resource/ci/build.rb | 6 - ee/app/models/ai/ai_resource/commit.rb | 8 - ee/app/models/ai/ai_resource/epic.rb | 6 - ee/app/models/ai/ai_resource/issue.rb | 6 - ee/app/models/ai/ai_resource/merge_request.rb | 6 - .../llm/chain/agents/zero_shot/executor.rb | 310 ------------- .../agents/zero_shot/prompts/anthropic.rb | 70 --- .../chain/agents/zero_shot/prompts/base.rb | 47 -- ee/lib/gitlab/llm/chain/gitlab_context.rb | 3 +- .../llm/chain/streamed_zero_shot_answer.rb | 43 -- .../llm/chain/tool_response_modifier.rb | 26 -- ee/lib/gitlab/llm/chain/utils/prompt.rb | 22 - .../chain/agents/zero_shot/executor_spec.rb | 437 ------------------ .../zero_shot/prompts/anthropic_spec.rb | 166 ------- .../gitlab/llm/chain/gitlab_context_spec.rb | 35 +- .../chain/streamed_zero_shot_answer_spec.rb | 50 -- .../llm/chain/tool_response_modifier_spec.rb | 19 - .../lib/gitlab/llm/chain/utils/prompt_spec.rb | 8 - .../models/ai/ai_resource/ci/build_spec.rb | 7 - ee/spec/models/ai/ai_resource/commit_spec.rb | 18 - ee/spec/models/ai/ai_resource/epic_spec.rb | 8 - ee/spec/models/ai/ai_resource/issue_spec.rb | 8 - .../ai/ai_resource/merge_request_spec.rb | 7 - scripts/duo_chat/reporter.rb | 308 ------------ spec/scripts/duo_chat/reporter_spec.rb | 270 ----------- spec/support/known_rspec_metadata_keys.yml | 3 - 32 files changed, 12 insertions(+), 1949 deletions(-) delete mode 100644 ee/lib/gitlab/llm/chain/agents/zero_shot/executor.rb delete mode 100644 ee/lib/gitlab/llm/chain/agents/zero_shot/prompts/anthropic.rb delete mode 100644 ee/lib/gitlab/llm/chain/agents/zero_shot/prompts/base.rb delete mode 100644 ee/lib/gitlab/llm/chain/streamed_zero_shot_answer.rb delete mode 100644 ee/lib/gitlab/llm/chain/tool_response_modifier.rb delete mode 100644 ee/spec/lib/gitlab/llm/chain/agents/zero_shot/executor_spec.rb delete mode 100644 ee/spec/lib/gitlab/llm/chain/agents/zero_shot/prompts/anthropic_spec.rb delete mode 100644 ee/spec/lib/gitlab/llm/chain/streamed_zero_shot_answer_spec.rb delete mode 100644 ee/spec/lib/gitlab/llm/chain/tool_response_modifier_spec.rb delete mode 100755 scripts/duo_chat/reporter.rb delete mode 100644 spec/scripts/duo_chat/reporter_spec.rb diff --git a/.gitlab/ci/rails.gitlab-ci.yml b/.gitlab/ci/rails.gitlab-ci.yml index 879ed459f821f..31e994041ae15 100644 --- a/.gitlab/ci/rails.gitlab-ci.yml +++ b/.gitlab/ci/rails.gitlab-ci.yml @@ -761,49 +761,6 @@ rspec-ee:predictive:trigger single-db-ci-connection: variables: PIPELINE_NAME: 'rspec-ee:predictive single-db-ci-connection' -.rspec-ee-base-gitlab-duo: - extends: - - .rspec-ee-base-pg14 - when: manual - variables: - REAL_AI_REQUEST: "true" - AI_GATEWAY_URL: http://ai-gateway:5052 - -rspec-ee unit gitlab-duo-chat-zeroshot pg14: - extends: - - .rspec-ee-base-gitlab-duo - - .rails:rules:ee-gitlab-duo-chat-optional - script: - - !reference [.base-script, script] - - rspec_parallelized_job "--tag zeroshot_executor" - -rspec-ee unit gitlab-duo-chat-qa-fast pg14: - extends: - - .rspec-ee-base-gitlab-duo - - .rails:rules:ee-gitlab-duo-chat-always - script: - - !reference [.base-script, script] - - rspec_parallelized_job "--tag fast_chat_qa_evaluation" - -rspec-ee unit gitlab-duo-chat-qa pg14: - variables: - QA_EVAL_REPORT_FILENAME: "qa_evaluation_report.md" - RSPEC_RETRY_RETRY_COUNT: 0 - extends: - - .rspec-ee-base-gitlab-duo - - .rails:rules:ee-gitlab-duo-chat-qa-full - script: - - !reference [.base-script, script] - - source ./scripts/utils.sh - - install_gitlab_gem - - bundle exec rspec -Ispec -rspec_helper --failure-exit-code 0 --color --tag chat_qa_evaluation -- ee/spec/lib/gitlab/llm/chain/agents/zero_shot/qa_evaluation_spec.rb - - ./scripts/duo_chat/reporter.rb - artifacts: - expire_in: 5d - paths: - - tmp/duo_chat/qa*.json - - "${QA_EVAL_REPORT_FILENAME}" - rspec-ee migration pg14: extends: - .rspec-ee-base-pg14 diff --git a/.rubocop_todo/layout/class_structure.yml b/.rubocop_todo/layout/class_structure.yml index 51ee34acfb4b5..aac977ca32427 100644 --- a/.rubocop_todo/layout/class_structure.yml +++ b/.rubocop_todo/layout/class_structure.yml @@ -243,7 +243,6 @@ Layout/ClassStructure: - 'ee/lib/gitlab/geo/git_ssh_proxy.rb' - 'ee/lib/gitlab/license_scanning/base_scanner.rb' - 'ee/lib/gitlab/license_scanning/package_licenses.rb' - - 'ee/lib/gitlab/llm/chain/agents/zero_shot/executor.rb' - 'ee/lib/gitlab/llm/chain/tools/tool.rb' - 'ee/lib/gitlab/llm/chat_storage.rb' - 'ee/lib/gitlab/llm/vertex_ai/configuration.rb' diff --git a/.rubocop_todo/rspec/be_eq.yml b/.rubocop_todo/rspec/be_eq.yml index 3b8b07f7e8225..1505189c2a2d9 100644 --- a/.rubocop_todo/rspec/be_eq.yml +++ b/.rubocop_todo/rspec/be_eq.yml @@ -156,7 +156,6 @@ RSpec/BeEq: - 'ee/spec/lib/gitlab/insights/project_insights_config_spec.rb' - 'ee/spec/lib/gitlab/licenses/submit_license_usage_data_banner_spec.rb' - 'ee/spec/lib/gitlab/llm/ai_gateway/docs_client_spec.rb' - - 'ee/spec/lib/gitlab/llm/chain/agents/zero_shot/executor_spec.rb' - 'ee/spec/lib/gitlab/llm/chain/answer_spec.rb' - 'ee/spec/lib/gitlab/llm/chain/requests/ai_gateway_spec.rb' - 'ee/spec/lib/gitlab/llm/chain/tools/gitlab_documentation/executor_spec.rb' diff --git a/.rubocop_todo/rspec/named_subject.yml b/.rubocop_todo/rspec/named_subject.yml index 4ebec3a982743..1afc893502c2b 100644 --- a/.rubocop_todo/rspec/named_subject.yml +++ b/.rubocop_todo/rspec/named_subject.yml @@ -395,7 +395,6 @@ RSpec/NamedSubject: - 'ee/spec/lib/gitlab/llm/ai_message_spec.rb' - 'ee/spec/lib/gitlab/llm/anthropic/response_modifiers/tanuki_bot_spec.rb' - 'ee/spec/lib/gitlab/llm/base_response_modifier_spec.rb' - - 'ee/spec/lib/gitlab/llm/chain/agents/zero_shot/prompts/anthropic_spec.rb' - 'ee/spec/lib/gitlab/llm/chain/parsers/output_parser_spec.rb' - 'ee/spec/lib/gitlab/llm/chain/response_modifier_spec.rb' - 'ee/spec/lib/gitlab/llm/chain/tools/tool_spec.rb' diff --git a/doc/development/ai_features/duo_chat.md b/doc/development/ai_features/duo_chat.md index a0fdda45bc32c..f7f6fd6d70b90 100644 --- a/doc/development/ai_features/duo_chat.md +++ b/doc/development/ai_features/duo_chat.md @@ -90,15 +90,11 @@ that Chat sends to assist troubleshooting. From the code perspective, Chat is implemented in the similar fashion as other AI features. Read more about GitLab [AI Abstraction layer](_index.md#feature-development-abstraction-layer). -The Chat feature uses a [zero-shot agent](https://gitlab.com/gitlab-org/gitlab/blob/master/ee/lib/gitlab/llm/chain/agents/zero_shot/executor.rb) -that includes a system prompt explaining how the large language model should -interpret the question and provide an answer. The system prompt defines -available tools that can be used to gather information to answer the user's -question. - -The zero-shot agent receives the user's question and decides which tools to use -to gather information to answer it. It then makes a request to the large -language model, which decides if it can answer directly or if it needs to use +The Chat feature uses a [zero-shot agent](https://gitlab.com/gitlab-org/gitlab/blob/master/ee/lib/gitlab/duo/chat/react_executor.rb) +that sends user question and relevant context to the [AI Gateway](https://gitlab.com/gitlab-org/modelops/applied-ml/code-suggestions/ai-assist) +which construct a prompt and sends the request to the large language model. + +Large language model decides if it can answer directly or if it needs to use one of the defined tools. The tools each have their own prompt that provides instructions to the large @@ -576,7 +572,7 @@ flow of how we construct a Chat prompt: 1. `Gitlab::Llm::Chain::Agents::SingleActionExecutor#execute` calls `execute_streamed_request`, which calls `request`, a method defined in the `AiDependent` concern - ([code](https://gitlab.com/gitlab-org/gitlab/-/blob/d539f64ce6c5bed72ab65294da3bcebdc43f68c6/ee/lib/gitlab/llm/chain/agents/zero_shot/executor.rb#L85)) + ([code](https://gitlab.com/gitlab-org/gitlab/-/blob/7ac19f75bd0ba4db5cfe7030e56c3672e2ccdc88/ee/lib/gitlab/llm/chain/concerns/ai_dependent.rb#L14)) 1. The `SingleActionExecutor#prompt_options` method assembles all prompt parameters for the AI gateway request ([code](https://gitlab.com/gitlab-org/gitlab/-/blob/971d07aa37d9f300b108ed66304505f2d7022841/ee/lib/gitlab/llm/chain/agents/single_action_executor.rb#L120-120)) 1. `ai_request` is defined in `Llm::Completions::Chat` and evaluates to diff --git a/doc/development/ai_features/logged_events.md b/doc/development/ai_features/logged_events.md index 98176a84629e3..96c29f8d4c19c 100644 --- a/doc/development/ai_features/logged_events.md +++ b/doc/development/ai_features/logged_events.md @@ -295,7 +295,7 @@ In addition to standard logging in the GitLab Rails Monolith instance, specializ ### Picked tool - Description: information about tool picked by chat - - Class: `Gitlab::Llm::Chain::Agents::ZeroShot::Executor` + - Class: `Gitlab::Llm::Chain::Tools::Tool` - Ai_event_name: picked_tool - Level: info - Arguments: diff --git a/ee/app/models/ai/ai_resource/ci/build.rb b/ee/app/models/ai/ai_resource/ci/build.rb index 2f61cc223411e..52b973c34d866 100644 --- a/ee/app/models/ai/ai_resource/ci/build.rb +++ b/ee/app/models/ai/ai_resource/ci/build.rb @@ -20,12 +20,6 @@ def current_page_type "build" end - def current_page_short_description - <<~SENTENCE - The user is currently on a page that displays a ci build which the user might refer to, for example, as 'current', 'this' or 'that'. - SENTENCE - end - def current_page_params { type: current_page_type diff --git a/ee/app/models/ai/ai_resource/commit.rb b/ee/app/models/ai/ai_resource/commit.rb index ce858935e6673..4038c72c24e14 100644 --- a/ee/app/models/ai/ai_resource/commit.rb +++ b/ee/app/models/ai/ai_resource/commit.rb @@ -19,14 +19,6 @@ def serialize_for_ai(content_limit:) def current_page_type "commit" end - - def current_page_short_description - return '' unless Feature.enabled?(:ai_commit_reader_for_chat, current_user) - - <<~SENTENCE - The user is currently on a page that displays a commit with a description, comments, etc., which the user might refer to, for example, as 'current', 'this' or 'that'. The title of the commit is '#{resource.title}'. - SENTENCE - end end end end diff --git a/ee/app/models/ai/ai_resource/epic.rb b/ee/app/models/ai/ai_resource/epic.rb index 874245a446ea1..4ad4a2b71de74 100644 --- a/ee/app/models/ai/ai_resource/epic.rb +++ b/ee/app/models/ai/ai_resource/epic.rb @@ -18,12 +18,6 @@ def serialize_for_ai(content_limit:) def current_page_type "epic" end - - def current_page_short_description - <<~SENTENCE - The user is currently on a page that displays an epic with a description, comments, etc., which the user might refer to, for example, as 'current', 'this' or 'that'. The title of the epic is '#{resource.title}'. - SENTENCE - end end end end diff --git a/ee/app/models/ai/ai_resource/issue.rb b/ee/app/models/ai/ai_resource/issue.rb index 8051d52878df0..47bbcfbefcba0 100644 --- a/ee/app/models/ai/ai_resource/issue.rb +++ b/ee/app/models/ai/ai_resource/issue.rb @@ -18,12 +18,6 @@ def serialize_for_ai(content_limit:) def current_page_type "issue" end - - def current_page_short_description - <<~SENTENCE - The user is currently on a page that displays an issue with a description, comments, etc., which the user might refer to, for example, as 'current', 'this' or 'that'. The title of the issue is '#{resource.title}'. - SENTENCE - end end end end diff --git a/ee/app/models/ai/ai_resource/merge_request.rb b/ee/app/models/ai/ai_resource/merge_request.rb index 8c100f5eb8cc6..5002df49569bc 100644 --- a/ee/app/models/ai/ai_resource/merge_request.rb +++ b/ee/app/models/ai/ai_resource/merge_request.rb @@ -18,12 +18,6 @@ def serialize_for_ai(content_limit:) def current_page_type "merge_request" end - - def current_page_short_description - <<~SENTENCE - The user is currently on a page that displays a merge request with a description, comments, etc., which the user might refer to, for example, as 'current', 'this' or 'that'. The title of the merge request is '#{resource.title}'. - SENTENCE - end end end end diff --git a/ee/lib/gitlab/llm/chain/agents/zero_shot/executor.rb b/ee/lib/gitlab/llm/chain/agents/zero_shot/executor.rb deleted file mode 100644 index 8aa79c533ff69..0000000000000 --- a/ee/lib/gitlab/llm/chain/agents/zero_shot/executor.rb +++ /dev/null @@ -1,310 +0,0 @@ -# frozen_string_literal: true - -# Deprecation: this executor will be removed in favor of ReactExecutor -# see https://gitlab.com/gitlab-org/gitlab/-/issues/469087 - -module Gitlab - module Llm - module Chain - module Agents - module ZeroShot - class Executor - include Gitlab::Utils::StrongMemoize - include ::Gitlab::Llm::Concerns::Logger - include Concerns::AiDependent - include Langsmith::RunHelpers - - attr_reader :tools, :user_input, :context, :response_handler - attr_accessor :iterations - - AGENT_NAME = 'GitLab Duo Chat' - MAX_ITERATIONS = 10 - RESPONSE_TYPE_TOOL = 'tool' - - PROVIDER_PROMPT_CLASSES = { - ai_gateway: ::Gitlab::Llm::Chain::Agents::ZeroShot::Prompts::Anthropic, - anthropic: ::Gitlab::Llm::Chain::Agents::ZeroShot::Prompts::Anthropic - }.freeze - - # @param [String] user_input - a question from a user - # @param [Array<Tool>] tools - an array of Tools defined in the tools module. - # @param [GitlabContext] context - Gitlab context containing useful context information - # @param [ResponseService] response_handler - Handles returning the response to the client - # @param [ResponseService] stream_response_handler - Handles streaming chunks to the client - def initialize(user_input:, tools:, context:, response_handler:, stream_response_handler: nil) - @user_input = user_input - @tools = tools - @context = context - @iterations = 0 - @response_handler = response_handler - @stream_response_handler = stream_response_handler - end - - def execute - MAX_ITERATIONS.times do - thought = execute_streamed_request - - answer = Answer.from_response(response_body: "Thought: #{thought}", tools: tools, context: context) - - return answer if answer.is_final? - - options[:agent_scratchpad] << "\nThought: #{answer.suggestions}" - options[:agent_scratchpad] << answer.content.to_s - - tool_class = answer.tool - - picked_tool_action(tool_class) - - tool = tool_class.new( - context: context, - options: { - input: user_input, - suggestions: options[:agent_scratchpad] - }, - stream_response_handler: stream_response_handler - ) - - tool_answer = tool.execute - - return tool_answer if tool_answer.is_final? - - options[:agent_scratchpad] << "Observation: #{tool_answer.content}\n" - end - - Answer.default_final_answer(context: context) - rescue Net::ReadTimeout => error - Gitlab::ErrorTracking.track_exception(error) - Answer.error_answer( - error: error, - context: context, - content: _("I'm sorry, I couldn't respond in time. Please try again."), - source: "chat_v1", - error_code: 'A1000' - ) - rescue Gitlab::Llm::AiGateway::Client::ConnectionError => error - Gitlab::ErrorTracking.track_exception(error) - Answer.error_answer( - error: error, - context: context, - source: "chat_v1", - error_code: "A1001" - ) - end - traceable :execute, name: 'Run ReAct' - - private - - def execute_streamed_request - request(&streamed_request_handler(StreamedZeroShotAnswer.new)) - end - - attr_reader :stream_response_handler - - # This method should not be memoized because the input variables change over time - def base_prompt - Utils::Prompt.no_role_text(PROMPT_TEMPLATE, options) - end - - def options - @options ||= { - tool_names: tools.map { |tool_class| tool_class::Executor::NAME }.join(', '), - tools_definitions: tools.map do |tool_class| - tool_class::Executor.full_definition - end.join("\n"), - user_input: user_input, - agent_scratchpad: +"", - conversation: conversation, - prompt_version: prompt_version, - zero_shot_prompt: zero_shot_prompt, - system_prompt: context.agent_version&.prompt, - current_resource: current_resource, - source_template: source_template, - current_code: current_code, - resources: available_resources_names, - unavailable_resources: unavailable_resources_names - } - end - - def picked_tool_action(tool_class) - log_info(message: "Picked tool", - event_name: 'picked_tool', - ai_component: 'duo_chat', - duo_chat_tool: tool_class.to_s) - - response_handler.execute( - response: Gitlab::Llm::Chain::ToolResponseModifier.new(tool_class), - options: { role: ::Gitlab::Llm::AiMessage::ROLE_SYSTEM, - type: RESPONSE_TYPE_TOOL } - ) - - # We need to stream the response for clients that already migrated to use `ai_action` and no longer - # use `resource_id` as an identifier. Once streaming is enabled and all clients migrated, we can - # remove the `response_handler` call above. - return unless stream_response_handler - - stream_response_handler.execute( - response: Gitlab::Llm::Chain::ToolResponseModifier.new(tool_class), - options: { - role: ::Gitlab::Llm::ChatMessage::ROLE_SYSTEM, - type: RESPONSE_TYPE_TOOL - } - ) - end - - def available_resources_names - tools.filter_map do |tool_class| - tool_class::Executor::RESOURCE_NAME.pluralize if tool_class::Executor::RESOURCE_NAME.present? - end.join(', ') - end - strong_memoize_attr :available_resources_names - - def unavailable_resources_names - %w[Pipelines Vulnerabilities].join(', ') - end - - def prompt_version - return CUSTOM_AGENT_PROMPT_TEMPLATE if context.agent_version - - PROMPT_TEMPLATE - end - - def zero_shot_prompt - ZERO_SHOT_PROMPT - end - - def last_conversation - ChatStorage.new(context.current_user, context.agent_version&.id).last_conversation - end - strong_memoize_attr :last_conversation - - def conversation - # include only messages with successful response and reorder - # messages so each question is followed by its answer - by_request = last_conversation - .reject { |message| message.errors.present? } - .group_by(&:request_id) - .select { |_uuid, messages| messages.size > 1 } - - by_request.values.sort_by { |messages| messages.first.timestamp }.flatten - end - - def current_code - file_context = current_file_context - return provider_prompt_class.current_selection_prompt(file_context) if file_context - - blob = @context.extra_resource[:blob] - return "" unless blob - - provider_prompt_class.current_blob_prompt(blob) - end - - def current_file_context - return unless context.current_file[:selected_text].present? - - context.current_file - end - - def prompt_options - options - end - - def current_resource - context.current_page_short_description - rescue ArgumentError - "" - end - - def source_template - <<~CONTEXT - If GitLab resource of issue or epic type is present and is directly relevant to the question, - include the following section at the end of your response: - 'Sources:' followed by the corresponding GitLab resource link named after the title of the resource. - Format the link using Markdown syntax ([title](link)) for it to be clickable. - CONTEXT - end - - ZERO_SHOT_PROMPT = <<~PROMPT.freeze - Answer the question as accurate as you can. - - You have access only to the following tools: - <tool_list> - %<tools_definitions>s - </tool_list> - Consider every tool before making a decision. - Ensure that your answer is accurate and contain only information directly supported by the information retrieved using provided tools. - - When you can answer the question directly you must use this response format: - Thought: you should always think about how to answer the question - Action: DirectAnswer - Final Answer: the final answer to the original input question if you have a direct answer to the user's question. - - You must always use the following format when using a tool: - Question: the input question you must answer - Thought: you should always think about what to do - Action: the action to take, should be one tool from this list: [%<tool_names>s] - Action Input: the input to the action needs to be provided for every action that uses a tool. - Observation: the result of the tool actions. But remember that you're still #{AGENT_NAME}. - - - ... (this Thought/Action/Action Input/Observation sequence can repeat N times) - - Thought: I know the final answer. - Final Answer: the final answer to the original input question. - - When concluding your response, provide the final answer as "Final Answer:". It should contain everything that user needs to see, including answer from "Observation" section. - %<current_code>s - - You have access to the following GitLab resources: %<resources>s. - You also have access to all information that can be helpful to someone working in software development of any kind. - At the moment, you do not have access to the following GitLab resources: %<unavailable_resources>s. - At the moment, you do not have the ability to search Issues or Epics based on a description or keywords. You can only read information about a specific issue/epic IF the user is on the specific issue/epic's page, or provides a URL or ID. - Do not use the IssueReader or EpicReader tool if you do not have these specified identifiers. - - %<source_template>s - - Ask user to leave feedback. - - %<current_resource>s - - Begin! - PROMPT - - PROMPT_TEMPLATE = [ - Utils::Prompt.as_system(ZERO_SHOT_PROMPT), - Utils::Prompt.as_user("Question: %<user_input>s"), - # We're explicitly adding "\n" before the `Assistant:` in order to avoid the Anthropic API error - # `prompt must end with "\n\nAssistant:" turn`. - # See https://gitlab.com/gitlab-org/gitlab/-/issues/435911 for more information. - Utils::Prompt.as_assistant("\nAssistant: %<agent_scratchpad>s"), - Utils::Prompt.as_assistant("Thought: ") - ].freeze - - CUSTOM_AGENT_PROMPT_TEMPLATE = [ - Utils::Prompt.as_system( - <<~PROMPT - You must always use the following format: - Question: the input question you must answer - Thought: you should always think about what to do - Action: the action to take, should be one tool from this list or a direct answer (then use DirectAnswer as action): [%<tool_names>s] - Action Input: the input to the action needs to be provided for every action that uses a tool - Observation: the result of the actions. If the Action is DirectAnswer never write an Observation, but remember that you're still #{AGENT_NAME}. - - ... (this Thought/Action/Action Input/Observation sequence can repeat N times) - - Thought: I know the final answer. - Final Answer: the final answer to the original input question. - - When concluding your response, provide the final answer as "Final Answer:" as soon as the answer is recognized. - - Begin! - PROMPT - ), - Utils::Prompt.as_user("Question: %<user_input>s"), - Utils::Prompt.as_assistant("Thought: ") - ].freeze - end - end - end - end - end -end diff --git a/ee/lib/gitlab/llm/chain/agents/zero_shot/prompts/anthropic.rb b/ee/lib/gitlab/llm/chain/agents/zero_shot/prompts/anthropic.rb deleted file mode 100644 index 6a233d9354c06..0000000000000 --- a/ee/lib/gitlab/llm/chain/agents/zero_shot/prompts/anthropic.rb +++ /dev/null @@ -1,70 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module Llm - module Chain - module Agents - module ZeroShot - module Prompts - class Anthropic < Base - include Concerns::AnthropicPrompt - extend Langsmith::RunHelpers - - def self.prompt(options) - history = truncated_conversation_list(options[:conversation]) - base = base_prompt(options) - - text = clean_messages(history + base) - - Requests::Anthropic.prompt(text) - end - traceable :prompt, name: 'Build prompt', run_type: 'prompt', class_method: true - - def self.truncated_conversation_list(conversation) - # We save a maximum of 50 chat history messages - # We save a max of 20k chars for each message prompt (~5k - # tokens) - # Response from Anthropic is max of 4096 tokens - # So the max tokens we would ever send 9k * 50 = 450k tokens. - # Max context window is 200k. - # For now, no truncating actually happening here but we should - # do that to make sure we stay under the limit. - # https://gitlab.com/gitlab-org/gitlab/-/issues/452608 - return [] if conversation.blank? - - conversation.map do |message, _| - { role: message.role.to_sym, content: message.content } - end - end - - def self.clean_messages(messages) - deduplicate_roles(messages.reject { |message| message[:content].nil? }) - end - - def self.deduplicate_roles(messages) - result = [] - previous_role = nil - - messages.each do |message| - current_role = message[:role] - current_content = message[:content] - - if current_role == previous_role - # If the current role is the same as the previous one, update the content - result.last[:content] = current_content - else - # If the role is different, add a new entry - result << { role: current_role, content: current_content } - previous_role = current_role - end - end - - result - end - end - end - end - end - end - end -end diff --git a/ee/lib/gitlab/llm/chain/agents/zero_shot/prompts/base.rb b/ee/lib/gitlab/llm/chain/agents/zero_shot/prompts/base.rb deleted file mode 100644 index e30f37bf8c845..0000000000000 --- a/ee/lib/gitlab/llm/chain/agents/zero_shot/prompts/base.rb +++ /dev/null @@ -1,47 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module Llm - module Chain - module Agents - module ZeroShot - module Prompts - class Base - def self.base_prompt(options) - system_prompt = options[:system_prompt] || Utils::Prompt.default_system_prompt - zero_shot_prompt = format(options[:zero_shot_prompt], options) - - Utils::Prompt.role_conversation([ - Utils::Prompt.as_system(system_prompt, zero_shot_prompt), - Utils::Prompt.as_user(options[:user_input]), - Utils::Prompt.as_assistant(options[:agent_scratchpad], "Thought:") - ]) - end - - def self.current_blob_prompt(blob) - <<~PROMPT - The current code file that user sees is #{blob.path} and has the following content: - <content> - #{blob.data} - </content> - - PROMPT - end - - def self.current_selection_prompt(current_file_context) - <<~PROMPT - User selected code below enclosed in <code></code> tags in file #{current_file_context[:file_name]} to work with: - - <code> - #{current_file_context[:selected_text]} - </code> - - PROMPT - end - end - end - end - end - end - end -end diff --git a/ee/lib/gitlab/llm/chain/gitlab_context.rb b/ee/lib/gitlab/llm/chain/gitlab_context.rb index 9d20aad78648a..07e7ab481b044 100644 --- a/ee/lib/gitlab/llm/chain/gitlab_context.rb +++ b/ee/lib/gitlab/llm/chain/gitlab_context.rb @@ -13,8 +13,7 @@ class GitlabContext attr_reader :project - delegate :current_page_type, :current_page_short_description, :current_page_params, - to: :authorized_resource, allow_nil: true + delegate :current_page_params, to: :authorized_resource, allow_nil: true # rubocop:disable Metrics/ParameterLists -- we probably need to rethink this initializer def initialize( diff --git a/ee/lib/gitlab/llm/chain/streamed_zero_shot_answer.rb b/ee/lib/gitlab/llm/chain/streamed_zero_shot_answer.rb deleted file mode 100644 index d794d31b05014..0000000000000 --- a/ee/lib/gitlab/llm/chain/streamed_zero_shot_answer.rb +++ /dev/null @@ -1,43 +0,0 @@ -# frozen_string_literal: true - -module Gitlab - module Llm - module Chain - class StreamedZeroShotAnswer < StreamedAnswer - def initialize - @final_answer_started = false - @full_message = '' - - super - end - - def next_chunk(content) - return if content.empty? - # If it already contains the final answer, we can return the content directly. - # There is then also no longer the need to build the full message. - return payload(content) if final_answer_started - - @full_message += content - - return unless final_answer_start.present? - - @final_answer_started = true - payload(final_answer_start.lstrip) - end - - private - - attr_accessor :full_message, :final_answer_started - - # The ChainOfThoughtParser would treat a response without any "Final Answer:" in the response - # as an answer. Because we do not have the full response when parsing the stream, we need to rely - # on the fact that everything after "Final Answer:" will be the final answer. - def final_answer_start - /Final Answer:(?<final_answer>.+)/m =~ full_message - - final_answer - end - end - end - end -end diff --git a/ee/lib/gitlab/llm/chain/tool_response_modifier.rb b/ee/lib/gitlab/llm/chain/tool_response_modifier.rb deleted file mode 100644 index bfa6a8d52e842..0000000000000 --- a/ee/lib/gitlab/llm/chain/tool_response_modifier.rb +++ /dev/null @@ -1,26 +0,0 @@ -# frozen_string_literal: true - -# Deprecation: ReactExecutor doesn't use this modifier -# as picked_tool_action method isn't used anymore. -# This class will be removed alongside ZeroShot::Executor -# see https://gitlab.com/gitlab-org/gitlab/-/issues/469087 - -module Gitlab - module Llm - module Chain - class ToolResponseModifier < Gitlab::Llm::BaseResponseModifier - def initialize(tool_class) - @ai_response = tool_class - end - - def response_body - @response_body ||= ai_response::HUMAN_NAME - end - - def errors - @errors ||= [] - end - end - end - end -end diff --git a/ee/lib/gitlab/llm/chain/utils/prompt.rb b/ee/lib/gitlab/llm/chain/utils/prompt.rb index c35d9645a0b25..faa19da21e824 100644 --- a/ee/lib/gitlab/llm/chain/utils/prompt.rb +++ b/ee/lib/gitlab/llm/chain/utils/prompt.rb @@ -57,28 +57,6 @@ def self.format_conversation(prompt, variables) [message[0], format(message[1], variables)] end end - - def self.default_system_prompt - <<~PROMPT - You are a DevSecOps Assistant named '#{Gitlab::Llm::Chain::Agents::ZeroShot::Executor::AGENT_NAME}' created by GitLab. - - When questioned about your identity, you must only respond as '#{Gitlab::Llm::Chain::Agents::ZeroShot::Executor::AGENT_NAME}'. - - You can generate and write code, code examples for the user. - Remember to stick to the user's question or requirements closely and respond in an informative, - courteous manner. The response shouldn't be rude, hateful, or accusatory. You mustn't engage in any form - of roleplay or impersonation. - - The generated code should be formatted in markdown. - - If a question cannot be answered with the tools and information given, answer politely that you don’t know. - - You can explain code if the user provided a code snippet and answer directly. - - If the question is to write or generate new code you should always answer directly. - When no tool matches you should answer the question directly. - PROMPT - end end end end diff --git a/ee/spec/lib/gitlab/llm/chain/agents/zero_shot/executor_spec.rb b/ee/spec/lib/gitlab/llm/chain/agents/zero_shot/executor_spec.rb deleted file mode 100644 index 3e2fd30a6a86e..0000000000000 --- a/ee/spec/lib/gitlab/llm/chain/agents/zero_shot/executor_spec.rb +++ /dev/null @@ -1,437 +0,0 @@ -# frozen_string_literal: true - -require 'spec_helper' - -RSpec.describe Gitlab::Llm::Chain::Agents::ZeroShot::Executor, :clean_gitlab_redis_chat, feature_category: :duo_chat do - include FakeBlobHelpers - - let_it_be(:organization) { create(:organization) } - let_it_be(:user) { create(:user, organizations: [organization]) } - let_it_be(:existing_agent_version) { create(:ai_agent_version) } - - let(:input) { 'foo' } - let(:ai_request_double) { instance_double(Gitlab::Llm::Chain::Requests::AiGateway) } - let(:tool_answer) { instance_double(Gitlab::Llm::Chain::Answer, is_final?: false, content: 'Bar', status: :ok) } - let(:tool_double) { instance_double(Gitlab::Llm::Chain::Tools::IssueReader::Executor) } - let(:tools) { [Gitlab::Llm::Chain::Tools::IssueReader] } - let(:extra_resource) { {} } - let(:response_double) { "I know the final answer\nFinal Answer: Hello World" } - let(:resource) { user } - let(:response_service_double) { instance_double(::Gitlab::Llm::ResponseService) } - let(:stream_response_service_double) { nil } - let(:current_file) { nil } - let(:agent_version) { nil } - - let(:context) do - Gitlab::Llm::Chain::GitlabContext.new( - current_user: user, container: nil, resource: resource, ai_request: ai_request_double, - extra_resource: extra_resource, current_file: current_file, agent_version: agent_version - ) - end - - before do - # This is normally derived from the AI Request class, but since we're using a double we have to mock that - allow(agent).to receive(:provider_prompt_class) - .and_return(::Gitlab::Llm::Chain::Agents::ZeroShot::Prompts::Anthropic) - end - - subject(:agent) do - described_class.new( - user_input: input, - tools: tools, - context: context, - response_handler: response_service_double, - stream_response_handler: stream_response_service_double - ) - end - - describe '#execute' do - before do - allow(context).to receive(:ai_request).and_return(ai_request_double) - allow(ai_request_double).to receive(:request).and_yield("Final Answer:").and_yield("Hello").and_yield(" World") - .and_return(response_double) - - allow(tool_double).to receive(:execute).and_return(tool_answer) - allow_next_instance_of(Gitlab::Llm::Chain::Answer) do |answer| - allow(answer).to receive(:tool).and_return(Gitlab::Llm::Chain::Tools::IssueReader::Executor) - end - allow(Gitlab::Llm::Chain::Tools::IssueReader::Executor) - .to receive(:new) - .with(context: context, options: anything, stream_response_handler: stream_response_service_double) - .and_return(tool_double) - end - - it 'executes associated tools and adds observations during the execution' do - answer = agent.execute - - expect(answer.is_final).to eq(true) - expect(answer.content).to include('Hello World') - end - - context 'without final answer' do - let(:logger) { instance_double(Gitlab::Llm::Logger) } - - before do - # just limiting the number of iterations here from 10 to 2 - stub_const("#{described_class.name}::MAX_ITERATIONS", 2) - allow(agent).to receive(:logger).at_least(:once).and_return(logger) - allow(agent).to receive(:request).and_return("Action: IssueReader\nAction Input: #3") - end - - it 'executes associated tools and adds observations during the execution' do - expect(logger).to receive(:info).with(hash_including(message: 'Picked tool')).twice - expect(response_service_double).to receive(:execute).at_least(:once) - - agent.execute - end - end - - context 'when max iterations reached' do - it 'returns' do - stub_const("#{described_class.name}::MAX_ITERATIONS", 2) - - allow(agent).to receive(:request).and_return("Action: IssueReader\nAction Input: #3") - expect(agent).to receive(:request).twice.times - expect(response_service_double).to receive(:execute).at_least(:once) - - answer = agent.execute - - expect(answer.is_final?).to eq(true) - expect(answer.content).to include(Gitlab::Llm::Chain::Answer.default_final_message) - end - end - - context 'when answer is final' do - let(:response_content_1) { "Thought: I know final answer\nFinal Answer: Foo" } - - it 'returns final answer' do - answer = agent.execute - - expect(answer.is_final?).to eq(true) - end - end - - context 'when tool answer if final' do - let(:tool_answer) { instance_double(Gitlab::Llm::Chain::Answer, is_final?: true) } - - it 'returns final answer' do - answer = agent.execute - - expect(answer.is_final?).to eq(true) - end - end - - context 'when stream_response_service is set' do - let(:stream_response_service_double) { instance_double(::Gitlab::Llm::ResponseService) } - - it 'streams the final answer' do - first_response_double = double - second_response_double = double - - allow(Gitlab::Llm::Chain::StreamedResponseModifier).to receive(:new).with("Hello", { chunk_id: 1 }) - .and_return(first_response_double) - - allow(Gitlab::Llm::Chain::StreamedResponseModifier).to receive(:new).with(" World", { chunk_id: 2 }) - .and_return(second_response_double) - - expect(stream_response_service_double).to receive(:execute).with( - response: first_response_double, - options: { chunk_id: 1 } - ) - expect(stream_response_service_double).to receive(:execute).with( - response: second_response_double, - options: { chunk_id: 2 } - ) - - agent.execute - end - - it 'streams the current tool', :aggregate_failures do - tool_double = double - - allow(Gitlab::Llm::Chain::ToolResponseModifier).to receive(:new) - .with(Gitlab::Llm::Chain::Tools::IssueReader::Executor) - .and_return(tool_double) - - expect(response_service_double).to receive(:execute).at_least(:once) - expect(stream_response_service_double).to receive(:execute).at_least(:once).with( - response: tool_double, - options: { role: ::Gitlab::Llm::ChatMessage::ROLE_SYSTEM, type: 'tool' } - ) - - allow(agent).to receive(:request).and_return("Action: IssueReader\nAction Input: #3") - - agent.execute - end - end - end - - describe '#prompt' do - let(:tools) do - [ - Gitlab::Llm::Chain::Tools::IssueReader, - Gitlab::Llm::Chain::Tools::EpicReader - ] - end - - let(:prompt_options) do - { - prompt_version: described_class::PROMPT_TEMPLATE, - resources: 'issues, epics', - system_prompt: nil - } - end - - let(:source_template) do - <<~CONTEXT - If GitLab resource of issue or epic type is present and is directly relevant to the question, - include the following section at the end of your response: - 'Sources:' followed by the corresponding GitLab resource link named after the title of the resource. - Format the link using Markdown syntax ([title](link)) for it to be clickable. - CONTEXT - end - - before do - allow(agent).to receive(:provider_prompt_class) - .and_return(Gitlab::Llm::Chain::Agents::ZeroShot::Prompts::Anthropic) - - create(:ai_chat_message, user: user, request_id: 'uuid1', role: 'user', content: 'question 1') - create(:ai_chat_message, user: user, request_id: 'uuid1', role: 'assistant', content: 'response 1') - # this should be ignored because response contains an error - create(:ai_chat_message, user: user, request_id: 'uuid2', role: 'user', content: 'question 2') - create(:ai_chat_message, - user: user, request_id: 'uuid2', role: 'assistant', content: 'response 2', errors: ['error']) - - # this should be ignored because it doesn't contain response - create(:ai_chat_message, user: user, request_id: 'uuid3', role: 'user', content: 'question 3') - - travel(2.minutes) do - create(:ai_chat_message, user: user, request_id: 'uuid4', role: 'user', content: 'question 4') - end - travel(2.minutes) do - create(:ai_chat_message, user: user, request_id: 'uuid5', role: 'user', content: 'question 5') - end - travel(3.minutes) do - create(:ai_chat_message, user: user, request_id: 'uuid4', role: 'assistant', content: 'response 4') - end - travel(4.minutes) do - create(:ai_chat_message, user: user, request_id: 'uuid5', role: 'assistant', content: 'response 5') - end - end - - it 'includes cleaned chat in prompt options with responses reordered to be paired with questions' do - expected_chat = [ - an_object_having_attributes(content: 'question 1'), - an_object_having_attributes(content: 'response 1'), - an_object_having_attributes(content: 'question 4'), - an_object_having_attributes(content: 'response 4'), - an_object_having_attributes(content: 'question 5'), - an_object_having_attributes(content: 'response 5') - ] - expect(Gitlab::Llm::Chain::Agents::ZeroShot::Prompts::Anthropic) - .to receive(:prompt).once.with(a_hash_including(conversation: expected_chat)) - - agent.prompt - end - - it 'includes source template' do - expect(system_prompt(agent)).to include(source_template) - end - - it 'includes prompt in the options' do - expect(Gitlab::Llm::Chain::Agents::ZeroShot::Prompts::Anthropic) - .to receive(:prompt).once.with(a_hash_including(prompt_options)) - - agent.prompt - end - - context 'when agent_version is passed' do - let(:agent_version) { existing_agent_version } - - before do - create(:ai_chat_message, user: user, agent_version_id: agent_version.id, request_id: 'uuid6', role: 'user', - content: 'agent version message 1') - create(:ai_chat_message, user: user, agent_version_id: agent_version.id, request_id: 'uuid6', - role: 'assistant', content: 'agent version message 2') - end - - it 'includes system prompt in prompt options' do - expect(Gitlab::Llm::Chain::Agents::ZeroShot::Prompts::Anthropic) - .to receive(:prompt).once.with(a_hash_including({ system_prompt: existing_agent_version.prompt, -prompt_version: described_class::CUSTOM_AGENT_PROMPT_TEMPLATE })) - - agent.prompt - end - - it 'includes only cleaned chat with messages for the user and agent' do - expected_chat = [ - an_object_having_attributes(content: 'agent version message 1'), - an_object_having_attributes(content: 'agent version message 2') - ] - expect(Gitlab::Llm::Chain::Agents::ZeroShot::Prompts::Anthropic) - .to receive(:prompt).once.with(a_hash_including(conversation: array_including(expected_chat))) - - agent.prompt - end - end - - context 'when duo chat context is created' do - shared_examples_for 'includes metadata' do - let(:metadata) do - <<~XML - <root> - <id>1</id> - <iid>1</iid> - <description> - <title>My title 1</title> - </description> - </root> - XML - end - - let(:prompt_resource) do - <<~CONTEXT - <resource> - #{metadata} - </resource> - CONTEXT - end - - let(:short_description) { 'short description' } - - it 'does not include the current resource metadata' do - expect(context).not_to receive(:resource_serialized) - expect(system_prompt(agent)).not_to include(prompt_resource) - end - - it 'includes the shortened resource description' do - expect(context).to receive(:current_page_short_description).and_return(short_description) - expect(system_prompt(agent)).to include(short_description) - end - end - - context 'when the resource is an issue' do - let(:resource) { create(:issue) } - - it_behaves_like 'includes metadata' - end - - context 'when the resource is an epic' do - let(:resource) { create(:epic) } - - it_behaves_like 'includes metadata' - end - end - - context 'with self discover part' do - let_it_be(:self_discoverability_prompt) { "You have access to the following GitLab resources: issues, epics" } - - it 'includes self-discoverability part in the prompt' do - expect(system_prompt(agent)).to include(self_discoverability_prompt) - end - end - - context 'when current_file is included in context' do - let(:selected_text) { 'code selection' } - let(:current_file) do - { - file_name: 'test.py', - selected_text: selected_text, - content_above_cursor: 'content_above_cursor', - content_below_cursor: 'content_below_cursor' - } - end - - it 'includes selected code in the prompt' do - expect(system_prompt(agent)).to include("code selection") - end - - context 'when selected_text is empty' do - let(:selected_text) { '' } - - it 'does not include selected code in the prompt' do - expect(system_prompt(agent)).not_to include("code selection") - end - end - end - - context 'when resource is a blob' do - let(:project) { build(:project) } - let(:blob) { fake_blob(path: 'foobar.rb', data: 'puts "hello world"') } - let(:extra_resource) { { blob: blob } } - - it 'includes the blob name and data in the prompt' do - expect(system_prompt(agent)).to include("foobar.rb") - expect(system_prompt(agent)).to include("puts \"hello world\"") - end - end - - context 'when times out error is raised' do - let(:error) { Net::ReadTimeout.new } - - before do - allow(Gitlab::ErrorTracking).to receive(:track_exception) - end - - context 'when streamed request times out' do - it 'returns an error' do - allow(ai_request_double).to receive(:request).and_raise(error) - - answer = agent.execute - - expect(answer.is_final).to eq(true) - expect(answer.content).to include("I'm sorry, I couldn't respond in time. Please try again.") - expect(answer.error_code).to include("A1000") - expect(Gitlab::ErrorTracking).to have_received(:track_exception).with(error) - end - end - - context 'when tool times out out' do - it 'returns an error' do - allow(ai_request_double).to receive(:request).and_return("Action: IssueReader\nAction Input: #3") - allow_next_instance_of(Gitlab::Llm::Chain::Answer) do |answer| - allow(answer).to receive(:tool).and_return(Gitlab::Llm::Chain::Tools::IssueReader::Executor) - end - - allow_next_instance_of(Gitlab::Llm::Chain::Tools::IssueReader::Executor) do |instance| - allow(instance).to receive(:execute).and_raise(error) - end - - allow(response_service_double).to receive(:execute) - - answer = agent.execute - - expect(answer.is_final).to eq(true) - expect(answer.content).to include("I'm sorry, I couldn't respond in time. Please try again.") - expect(answer.error_code).to include("A1000") - expect(Gitlab::ErrorTracking).to have_received(:track_exception).with(error) - end - end - end - - context 'when connection error is raised' do - let(:error) { ::Gitlab::Llm::AiGateway::Client::ConnectionError.new } - - before do - allow(Gitlab::ErrorTracking).to receive(:track_exception) - end - - context 'when streamed request times out' do - it 'returns an error' do - allow(ai_request_double).to receive(:request).and_raise(error) - - answer = agent.execute - - expect(answer.is_final).to eq(true) - expect(answer.content).to include("I'm sorry, I can't generate a response. Please try again.") - expect(answer.error_code).to include("A1001") - expect(Gitlab::ErrorTracking).to have_received(:track_exception).with(error) - end - end - end - end - - def system_prompt(agent) - agent.prompt[:prompt].reverse.find { |h| h[:role] == :system }[:content] - end -end diff --git a/ee/spec/lib/gitlab/llm/chain/agents/zero_shot/prompts/anthropic_spec.rb b/ee/spec/lib/gitlab/llm/chain/agents/zero_shot/prompts/anthropic_spec.rb deleted file mode 100644 index 841ea4fbcb210..0000000000000 --- a/ee/spec/lib/gitlab/llm/chain/agents/zero_shot/prompts/anthropic_spec.rb +++ /dev/null @@ -1,166 +0,0 @@ -# frozen_string_literal: true - -require 'spec_helper' - -RSpec.describe Gitlab::Llm::Chain::Agents::ZeroShot::Prompts::Anthropic, feature_category: :duo_chat do - include FakeBlobHelpers - - describe '.prompt' do - let(:prompt_version) { ::Gitlab::Llm::Chain::Agents::ZeroShot::Executor::PROMPT_TEMPLATE } - let(:zero_shot_prompt) { ::Gitlab::Llm::Chain::Agents::ZeroShot::Executor::ZERO_SHOT_PROMPT } - let(:user) { create(:user) } - let(:user_input) { 'foo?' } - let(:system_prompt) { nil } - let(:options) do - { - tools_definitions: "tool definitions", - tool_names: "tool names", - user_input: user_input, - agent_scratchpad: "some observation", - conversation: [ - build(:ai_message, request_id: 'uuid1', role: 'user', content: 'question 1'), - build(:ai_message, request_id: 'uuid1', role: 'assistant', content: 'response 1'), - build(:ai_message, request_id: 'uuid1', role: 'user', content: 'question 2'), - build(:ai_message, request_id: 'uuid1', role: 'assistant', content: 'response 2') - ], - prompt_version: prompt_version, - current_code: "", - current_resource: "", - resources: "", - current_user: user, - zero_shot_prompt: zero_shot_prompt, - system_prompt: system_prompt, - unavailable_resources: '', - source_template: "source template" - } - end - - let(:prompt_text) { "Answer the question as accurate as you can." } - - subject { described_class.prompt(options)[:prompt] } - - it 'returns the prompt format expected by the anthropic messages API' do - prompt = subject - prompts_by_role = prompt.group_by { |prompt| prompt[:role] } - user_prompts = prompts_by_role[:user] - assistant_prompts = prompts_by_role[:assistant] - - expect(prompt).to be_instance_of(Array) - - expect(prompts_by_role[:system][0][:content]).to include( - Gitlab::Llm::Chain::Utils::Prompt.default_system_prompt - ) - - expect(user_prompts[0][:content]).to eq("question 1") - expect(user_prompts[1][:content]).to eq("question 2") - expect(user_prompts[2][:content]).to eq(user_input) - - expect(prompts_by_role[:system][0][:content]).to include(prompt_text) - - expect(assistant_prompts[0][:content]).to eq("response 1") - expect(assistant_prompts[1][:content]).to eq("response 2") - end - - context 'when system prompt is provided' do - let(:system_prompt) { 'A custom prompt' } - let(:prompt_version) do - [ - Gitlab::Llm::Chain::Utils::Prompt.as_system('Some new instructions'), - Gitlab::Llm::Chain::Utils::Prompt.as_user("Question: %<user_input>s") - ] - end - - it 'returns the system prompt' do - prompt = subject - prompts_by_role = prompt.group_by { |prompt| prompt[:role] } - user_prompts = prompts_by_role[:user] - assistant_prompts = prompts_by_role[:assistant] - - expect(prompt).to be_instance_of(Array) - expect(prompts_by_role[:system][0][:content]).to include(system_prompt) - - expect(user_prompts[0][:content]).to eq("question 1") - expect(user_prompts[1][:content]).to eq("question 2") - - expect(user_prompts[2][:content]).to eq(user_input) - expect(prompts_by_role[:system][0][:content]).to include(prompt_text) - - expect(assistant_prompts[0][:content]).to eq("response 1") - expect(assistant_prompts[1][:content]).to eq("response 2") - end - end - - context 'when role is duplicated in history' do - let(:options) do - { - tools_definitions: "tool definitions", - tool_names: "tool names", - user_input: user_input, - agent_scratchpad: "some observation", - conversation: [ - build(:ai_message, request_id: 'uuid1', role: 'user', content: 'question 1'), - build(:ai_message, request_id: 'uuid1', role: 'assistant', content: 'response 1'), - build(:ai_message, request_id: 'uuid1', role: 'user', content: 'question 2'), - build(:ai_message, request_id: 'uuid1', role: 'assistant', content: 'duplicated response 1'), - build(:ai_message, request_id: 'uuid1', role: 'assistant', content: 'duplicated response 2') - ], - prompt_version: prompt_version, - current_code: "", - current_resource: "", - resources: "", - current_user: user, - zero_shot_prompt: zero_shot_prompt, - system_prompt: system_prompt, - unavailable_resources: '', - source_template: "source template" - } - end - - it 'returns last message with role' do - prompt = subject - - expect(prompt).to be_instance_of(Array) - expect(prompt).not_to include(hash_including(role: :assistant, content: 'duplicated response 1')) - expect(prompt).to include(hash_including(role: :assistant, content: 'duplicated response 2')) - end - end - - context 'when message content is nil' do - let(:options) do - { - tools_definitions: "tool definitions", - tool_names: "tool names", - user_input: user_input, - agent_scratchpad: "some observation", - conversation: [ - build(:ai_message, request_id: 'uuid1', role: 'user', content: 'question 1'), - build(:ai_message, request_id: 'uuid1', role: 'assistant', content: nil), - build(:ai_message, request_id: 'uuid1', role: 'user', content: 'question 2'), - build(:ai_message, request_id: 'uuid1', role: 'assistant', content: 'response 2') - ], - prompt_version: prompt_version, - current_code: "", - current_resource: "", - resources: "", - current_user: user, - zero_shot_prompt: zero_shot_prompt, - system_prompt: system_prompt, - unavailable_resources: '', - source_template: "source template" - } - end - - it 'removes messages with nil content and deduplicates roles' do - prompt = subject - - expect(prompt).to be_instance_of(Array) - expect(prompt).not_to include(hash_including(role: :user, content: 'question 1')) - expect(prompt).not_to include(hash_including(content: nil)) - expect(prompt).to include(hash_including(role: :user, content: 'question 2')) - expect(prompt).to include(hash_including(role: :assistant, content: 'response 2')) - end - end - end - - it_behaves_like 'zero shot prompt' -end diff --git a/ee/spec/lib/gitlab/llm/chain/gitlab_context_spec.rb b/ee/spec/lib/gitlab/llm/chain/gitlab_context_spec.rb index d19dc1b319fd9..798a73f863509 100644 --- a/ee/spec/lib/gitlab/llm/chain/gitlab_context_spec.rb +++ b/ee/spec/lib/gitlab/llm/chain/gitlab_context_spec.rb @@ -31,14 +31,6 @@ group.namespace_settings.update!(experiment_features_enabled: true) end - describe '#current_page_type' do - let(:resource) { create(:issue, project: project) } - - it 'delegates to ai resource' do - expect(context.current_page_type).to eq("issue") - end - end - describe '#resource_serialized' do let(:content_limit) { 500 } @@ -90,39 +82,20 @@ end end - describe '#current_page_short_description' do - context 'with an unauthorized resource' do - let(:resource) { create(:issue) } - - it 'returns nil' do - expect(context.current_page_short_description).to be_nil - end - end - - context 'with an authorized resource' do - let(:resource) { create(:issue, project: project) } - - it 'returns short description of issue' do - expect(context.current_page_short_description).to include("The title of the issue is '#{resource.title}'.") - end - end - end - - describe '#current_page_description' do + describe '#current_page_params' do context 'with an unauthorized resource' do let(:resource) { create(:issue) } it 'returns nil' do - expect(context.current_page_short_description).to be_nil + expect(context.current_page_params).to be_nil end end context 'with an authorized resource' do let(:resource) { create(:issue, project: project) } - it 'returns sentence about the resource' do - expect(context.current_page_short_description) - .to include("The user is currently on a page that displays an issue") + it 'returns resource params' do + expect(context.current_page_params).to include({ title: resource.title }) end end end diff --git a/ee/spec/lib/gitlab/llm/chain/streamed_zero_shot_answer_spec.rb b/ee/spec/lib/gitlab/llm/chain/streamed_zero_shot_answer_spec.rb deleted file mode 100644 index be63ec1191552..0000000000000 --- a/ee/spec/lib/gitlab/llm/chain/streamed_zero_shot_answer_spec.rb +++ /dev/null @@ -1,50 +0,0 @@ -# frozen_string_literal: true - -require 'fast_spec_helper' - -RSpec.describe Gitlab::Llm::Chain::StreamedZeroShotAnswer, feature_category: :duo_chat do - describe '#next_chunk' do - let(:streamed_answer) { described_class.new } - - context 'when stream is empty' do - it 'returns nil' do - expect(streamed_answer.next_chunk("")).to be_nil - end - end - - context 'when stream does not contain the final answer' do - it 'returns nil' do - expect(streamed_answer.next_chunk("Some")).to be_nil - expect(streamed_answer.next_chunk("Content")).to be_nil - end - end - - context 'when receiving thoughts and actions' do - it 'only returns the final answer', :aggregate_failures do - expect(streamed_answer.next_chunk("Thought: thought\n")).to be_nil - expect(streamed_answer.next_chunk("Action: IssueReader\n")).to be_nil - expect(streamed_answer.next_chunk("Final Answer: Hello")).to eq({ id: 1, content: "Hello" }) - end - end - - context 'when receiving a final answer split up in multiple tokens', :aggregate_failures do - it 'returns the final answer once it is ready', :aggregate_failures do - expect(streamed_answer.next_chunk("Final Answer")).to be_nil - expect(streamed_answer.next_chunk(": ")).to be_nil - expect(streamed_answer.next_chunk("Hello")).to eq({ id: 1, content: "Hello" }) - expect(streamed_answer.next_chunk(" ")).to eq({ id: 2, content: " " }) - end - end - - context 'when receiving empty chunks', :aggregate_failures do - it 'skips them', :aggregate_failures do - expect(streamed_answer.next_chunk("Final Answer:")).to be_nil - expect(streamed_answer.next_chunk("")).to be_nil - expect(streamed_answer.next_chunk("Hello")).to eq({ id: 1, content: "Hello" }) - expect(streamed_answer.next_chunk(" ")).to eq({ id: 2, content: " " }) - expect(streamed_answer.next_chunk("")).to be_nil - expect(streamed_answer.next_chunk("World")).to eq({ id: 3, content: "World" }) - end - end - end -end diff --git a/ee/spec/lib/gitlab/llm/chain/tool_response_modifier_spec.rb b/ee/spec/lib/gitlab/llm/chain/tool_response_modifier_spec.rb deleted file mode 100644 index 237c2da54127b..0000000000000 --- a/ee/spec/lib/gitlab/llm/chain/tool_response_modifier_spec.rb +++ /dev/null @@ -1,19 +0,0 @@ -# frozen_string_literal: true - -require 'spec_helper' - -RSpec.describe Gitlab::Llm::Chain::ToolResponseModifier, feature_category: :shared do - let(:content) { ::Gitlab::Llm::Chain::Tools::GitlabDocumentation::Executor } - - context 'on success' do - subject { described_class.new(content).response_body } - - it { is_expected.to eq content::HUMAN_NAME } - end - - context 'on error' do - subject { described_class.new(content).errors } - - it { is_expected.to eq [] } - end -end diff --git a/ee/spec/lib/gitlab/llm/chain/utils/prompt_spec.rb b/ee/spec/lib/gitlab/llm/chain/utils/prompt_spec.rb index ba3e755dc2b5c..595440a096558 100644 --- a/ee/spec/lib/gitlab/llm/chain/utils/prompt_spec.rb +++ b/ee/spec/lib/gitlab/llm/chain/utils/prompt_spec.rb @@ -132,12 +132,4 @@ end end end - - describe "#default_system_prompt" do - it 'includes the prompt to explain code directly' do - prompt = "You can explain code if the user provided a code snippet and answer directly." - - expect(described_class.default_system_prompt).to include prompt - end - end end diff --git a/ee/spec/models/ai/ai_resource/ci/build_spec.rb b/ee/spec/models/ai/ai_resource/ci/build_spec.rb index ab4e9c520baef..92cefdf739b3f 100644 --- a/ee/spec/models/ai/ai_resource/ci/build_spec.rb +++ b/ee/spec/models/ai/ai_resource/ci/build_spec.rb @@ -31,13 +31,6 @@ end end - describe '#current_page_short_description' do - it 'returns prompt' do - expect(wrapped_build.current_page_short_description) - .to include("The user is currently on a page that displays a ci build") - end - end - describe '#current_page_params' do it 'returns params to construct prompt' do expect(wrapped_build.current_page_params.keys).to eq([:type]) diff --git a/ee/spec/models/ai/ai_resource/commit_spec.rb b/ee/spec/models/ai/ai_resource/commit_spec.rb index 7b11ad67abc6e..910ec6db9c9f3 100644 --- a/ee/spec/models/ai/ai_resource/commit_spec.rb +++ b/ee/spec/models/ai/ai_resource/commit_spec.rb @@ -29,22 +29,4 @@ expect(wrapped_commit.current_page_type).to eq('commit') end end - - describe '#current_page_short_description' do - it 'returns prompt' do - expect(wrapped_commit.current_page_short_description) - .to include("The title of the commit is '#{commit.title}'.") - end - - context 'with mr for chat feature flag disabled' do - before do - stub_feature_flags(ai_commit_reader_for_chat: false) - end - - it 'returns empty string' do - expect(wrapped_commit.current_page_short_description) - .to eq("") - end - end - end end diff --git a/ee/spec/models/ai/ai_resource/epic_spec.rb b/ee/spec/models/ai/ai_resource/epic_spec.rb index 4643cc5a95706..2a77646c74f63 100644 --- a/ee/spec/models/ai/ai_resource/epic_spec.rb +++ b/ee/spec/models/ai/ai_resource/epic_spec.rb @@ -23,14 +23,6 @@ end end - describe '#current_page_short_description' do - it 'returns prompt' do - expect(wrapped_epic.current_page_short_description).to include("The title of the epic is '#{epic.title}'.") - expect(wrapped_epic.current_page_short_description) - .not_to include("utilize it instead of using the 'EpicReader' tool") - end - end - describe '#current_page_type' do it 'returns type' do expect(wrapped_epic.current_page_type).to eq('epic') diff --git a/ee/spec/models/ai/ai_resource/issue_spec.rb b/ee/spec/models/ai/ai_resource/issue_spec.rb index a8e42bb5baf9e..1dad522b48b3c 100644 --- a/ee/spec/models/ai/ai_resource/issue_spec.rb +++ b/ee/spec/models/ai/ai_resource/issue_spec.rb @@ -22,14 +22,6 @@ end end - describe '#current_page_short_description' do - it 'returns prompt' do - expect(wrapped_issue.current_page_short_description).to include("The title of the issue is '#{issue.title}'.") - expect(wrapped_issue.current_page_short_description) - .not_to include("utilize it instead of using the 'IssueReader' tool") - end - end - describe '#current_page_type' do it 'returns type' do expect(wrapped_issue.current_page_type).to eq('issue') diff --git a/ee/spec/models/ai/ai_resource/merge_request_spec.rb b/ee/spec/models/ai/ai_resource/merge_request_spec.rb index 4be019193a3cd..b3a05cf990222 100644 --- a/ee/spec/models/ai/ai_resource/merge_request_spec.rb +++ b/ee/spec/models/ai/ai_resource/merge_request_spec.rb @@ -28,11 +28,4 @@ expect(wrapped_merge_request.current_page_type).to eq('merge_request') end end - - describe '#current_page_short_description' do - it 'returns prompt' do - expect(wrapped_merge_request.current_page_short_description) - .to include("The title of the merge request is '#{merge_request.title}'.") - end - end end diff --git a/scripts/duo_chat/reporter.rb b/scripts/duo_chat/reporter.rb deleted file mode 100755 index 15df28168b7a9..0000000000000 --- a/scripts/duo_chat/reporter.rb +++ /dev/null @@ -1,308 +0,0 @@ -#!/usr/bin/env ruby -# frozen_string_literal: true - -# We need to take some precautions when using the `gitlab` gem in this project. -# -# See https://docs.gitlab.com/ee/development/pipelines/internals.html#using-the-gitlab-ruby-gem-in-the-canonical-project. -require 'gitlab' -require 'json' - -class Reporter - GITLAB_COM_API_V4_ENDPOINT = "https://gitlab.com/api/v4" - QA_EVALUATION_PROJECT_ID = 52020045 # https://gitlab.com/gitlab-org/ai-powered/ai-framework/qa-evaluation - AGGREGATED_REPORT_ISSUE_IID = 1 # https://gitlab.com/gitlab-org/ai-powered/ai-framework/qa-evaluation/-/issues/1 - IDENTIFIABLE_NOTE_TAG = 'gitlab-org/ai-powered/ai-framework:duo-chat-qa-evaluation' - - GRADE_TO_EMOJI_MAPPING = { - correct: ":white_check_mark:", - incorrect: ":x:", - unexpected: ":warning:" - }.freeze - - def run - if pipeline_running_on_master_branch? - snippet_web_url = upload_data_as_snippet - report_issue_url = create_report_issue - update_aggregation_issue(report_issue_url, snippet_web_url) - else - save_report_as_artifact - post_or_update_report_note - end - end - - def markdown_report - @report ||= <<~MARKDOWN - <!-- #{IDENTIFIABLE_NOTE_TAG} --> - - ## GitLab Duo Chat QA evaluation - - Report generated for "#{ENV['CI_JOB_NAME']}". This report is generated and refreshed automatically. Do not edit. - - LLMs have been asked to evaluate GitLab Duo Chat's answers. - - :white_check_mark: : LLM evaluated the answer as `CORRECT`. - - :x: : LLM evaluated the answer as `INCORRECT`. - - :warning: : LLM did not evaluate correctly or the evaluation request might have failed. - - ### Summary - - - The total number of evaluations: #{summary_numbers[:total]} - - - The number of evaluations in which all LLMs graded `CORRECT`: #{summary_numbers[:correct]} (#{summary_numbers[:correct_ratio]}%) - - - Note: if an evaluation request failed or its response was not parsable, it was ignored. For example, :white_check_mark: :warning: would count as `CORRECT`. - - - The number of evaluations in which all LLMs graded `INCORRECT`: #{summary_numbers[:incorrect]} (#{summary_numbers[:incorrect_ratio]}%) - - - Note: if an evaluation request failed or its response was not parsable, it was ignored. For example, :x: :warning: would count as `INCORRECT`. - - - The number of evaluations in which LLMs disagreed: #{summary_numbers[:disagreed]} (#{summary_numbers[:disagreed_ratio]}%) - - - ### Evaluations - - #{eval_content} - - - MARKDOWN - - # Do this to avoid pinging users in notes/issues. - quote_usernames(@report) - end - - private - - def quote_usernames(text) - text.gsub(/(@\w+)/, '`\\1`') - end - - def pipeline_running_on_master_branch? - ENV['CI_COMMIT_BRANCH'] == ENV['CI_DEFAULT_BRANCH'] - end - - def utc_timestamp - @utc_timestamp ||= Time.now.utc - end - - def upload_data_as_snippet - filename = "#{utc_timestamp.to_i}.json" - title = utc_timestamp.to_s - snippet_content = ::JSON.pretty_generate({ - commit: ENV["CI_COMMIT_SHA"], - pipeline_url: ENV["CI_PIPELINE_URL"], - data: report_data - }) - - puts "Creating a snippet #{filename}." - snippet = qa_evaluation_project_client.create_snippet( - QA_EVALUATION_PROJECT_ID, - { - title: title, - files: [{ file_path: filename, content: snippet_content }], - visibility: 'private' - } - ) - - snippet.web_url - end - - def create_report_issue - puts "Creating a report issue." - issue_title = "Report #{utc_timestamp}" - new_issue = qa_evaluation_project_client.create_issue( - QA_EVALUATION_PROJECT_ID, issue_title, { description: markdown_report } - ) - - new_issue.web_url - end - - def update_aggregation_issue(report_issue_url, snippet_web_url) - puts "Updating the aggregated report issue." - - new_line = ["\n|"] - new_line << "#{utc_timestamp} |" - new_line << "#{summary_numbers[:total]} |" - new_line << "#{summary_numbers[:correct_ratio]}% |" - new_line << "#{summary_numbers[:incorrect_ratio]}% |" - new_line << "#{summary_numbers[:disagreed_ratio]}% |" - new_line << "#{report_issue_url} |" - new_line << "#{snippet_web_url} |" - new_line = new_line.join(' ') - - aggregated_report_issue = qa_evaluation_project_client.issue(QA_EVALUATION_PROJECT_ID, AGGREGATED_REPORT_ISSUE_IID) - updated_description = aggregated_report_issue.description + new_line - qa_evaluation_project_client.edit_issue( - QA_EVALUATION_PROJECT_ID, AGGREGATED_REPORT_ISSUE_IID, { description: updated_description } - ) - end - - def save_report_as_artifact - artifact_path = File.join(base_dir, ENV['QA_EVAL_REPORT_FILENAME']) - - puts "Saving #{artifact_path}" - File.write(artifact_path, markdown_report) - end - - def post_or_update_report_note - note = existing_report_note - if note && note.type != 'DiscussionNote' - # The latest note has not led to a discussion. Update it. - gitlab_project_client.edit_merge_request_note(ci_project_id, merge_request_iid, note.id, markdown_report) - - puts "Updated comment." - else - # This is the first note or the latest note has been discussed on the MR. - # Don't update, create new note instead. - gitlab_project_client.create_merge_request_note(ci_project_id, merge_request_iid, markdown_report) - - puts "Posted comment." - end - end - - def existing_report_note - # Look for an existing note using `IDENTIFIABLE_NOTE_TAG` - gitlab_project_client - .merge_request_notes(ci_project_id, merge_request_iid) - .auto_paginate - .select { |note| note.body.include? IDENTIFIABLE_NOTE_TAG } - .max_by { |note| Time.parse(note.created_at) } - end - - def gitlab_project_client - @gitlab_project_client ||= Gitlab.client( - endpoint: GITLAB_COM_API_V4_ENDPOINT, - private_token: ENV['PROJECT_TOKEN_FOR_CI_SCRIPTS_API_USAGE'] - ) - end - - def qa_evaluation_project_client - @qa_evaluation_project_client ||= Gitlab.client( - endpoint: GITLAB_COM_API_V4_ENDPOINT, - private_token: ENV['CHAT_QA_EVALUATION_PROJECT_TOKEN_FOR_CI_SCRIPTS_API_USAGE'] - ) - end - - def base_dir - ENV['CI_PROJECT_DIR'] || "./" - end - - def merge_request_iid - ENV['CI_MERGE_REQUEST_IID'] - end - - def ci_project_id - ENV['CI_PROJECT_ID'] - end - - def report_data - @report_data ||= Dir[File.join(base_dir, "tmp/duo_chat/qa*.json")] - .flat_map { |file| JSON.parse(File.read(file)) } - end - - def eval_content - report_data - .sort_by { |a| a["question"] } - .map do |data| - <<~MARKDOWN - <details> - - <summary> - - #{correctness_indicator(data)} - - `"#{data['question']}"` - - (context: `#{data['resource']}`) - - </summary> - - #### Resource - - `#{data['resource']}` - - #### Answer - - #{data['answer']} - - #### LLM Evaluation - - Tools used: #{data['tools_used']} - - #{evalutions(data)} - - - </details> - - MARKDOWN - end - .join - end - - def summary_numbers - @graded_evaluations ||= report_data - .map { |data| data["evaluations"].map { |eval| parse_grade(eval) } } - .reject { |grades| !(grades.include? :correct) && !(grades.include? :incorrect) } - - total = @graded_evaluations.size - correct = @graded_evaluations.count { |grades| !(grades.include? :incorrect) } - incorrect = @graded_evaluations.count { |grades| !(grades.include? :correct) } - disagreed = @graded_evaluations.count { |grades| (grades.include? :correct) && (grades.include? :incorrect) } - - { - total: total, - correct: correct, - correct_ratio: (correct.to_f / total * 100).round(1), - incorrect: incorrect, - incorrect_ratio: (incorrect.to_f / total * 100).round(1), - disagreed: disagreed, - disagreed_ratio: (disagreed.to_f / total * 100).round(1) - } - end - - def parse_grade(eval) - return :correct if eval["response"].match?(/Grade: CORRECT/i) - return :incorrect if eval["response"].match?(/Grade: INCORRECT/i) - - # If the LLM's evaluation includes neither CORRECT nor CORRECT, flag it. - :unexpected - end - - def correctness_indicator(data) - data["evaluations"].map { |eval| parse_grade(eval) }.map { |grade| GRADE_TO_EMOJI_MAPPING[grade] }.join(' ') - end - - def evalutions(data) - rows = data["evaluations"].map do |eval| - grade = parse_grade(eval) - - <<~MARKDOWN - <tr> - <td>#{eval['model']}</td> - <td> - #{GRADE_TO_EMOJI_MAPPING[grade]} - </td> - <td> - #{eval['response']} - </td - </tr> - - MARKDOWN - end - .join - - <<~MARKDOWN - <table> - <tr> - <td>Model</td> - <td>Grade</td> - <td>Details</td> - </tr> - #{rows} - </table> - MARKDOWN - end -end - -Reporter.new.run if $PROGRAM_NAME == __FILE__ diff --git a/spec/scripts/duo_chat/reporter_spec.rb b/spec/scripts/duo_chat/reporter_spec.rb deleted file mode 100644 index 8f778416c0d2d..0000000000000 --- a/spec/scripts/duo_chat/reporter_spec.rb +++ /dev/null @@ -1,270 +0,0 @@ -# frozen_string_literal: true - -require 'fast_spec_helper' -require 'gitlab' -require 'json' -require_relative '../../../scripts/duo_chat/reporter' - -RSpec.describe Reporter, feature_category: :ai_abstraction_layer do - subject(:reporter) { described_class.new } - - describe '#run', :freeze_time do - let(:ci_commit_sha) { 'commitsha' } - let(:ci_pipeline_url) { 'https://gitlab.com/pipeline/url' } - let(:client) { double } - - before do - stub_env('CI_COMMIT_SHA', ci_commit_sha) - stub_env('CI_PIPELINE_URL', ci_pipeline_url) - stub_env('CI_COMMIT_BRANCH', ci_commit_branch) - stub_env('CI_DEFAULT_BRANCH', ci_default_branch) - - allow(Gitlab).to receive(:client).and_return(client) - end - - context 'when the CI pipeline is running with the commit in `master` branch' do - let(:ci_commit_branch) { 'master' } - let(:ci_default_branch) { 'master' } - let(:snippet_web_url) { 'https://gitlab.com/snippet/url' } - let(:issue_web_url) { 'https://gitlab.com/issue/url' } - - let(:mock_data) do - [ - { - "question" => "question1", - "resource" => "resource", - "answer" => "answer1", - "tools_used" => ["foobar tool"], - "evaluations" => [ - { "model" => "claude-2", "response" => "Grade: CORRECT" }, - { "model" => "text-bison", "response" => "Grade: CORRECT" } - ] - } - ] - end - - before do - allow(reporter).to receive(:report_data).and_return(mock_data) - end - - it 'uploads snippet, creates a report issue and updates the tracking issue' do - # Uploads the test data as a snippet along with commit sha and pipeline url - snippet = double(web_url: snippet_web_url) # rubocop: disable RSpec/VerifiedDoubles -- an internal detail of Gitlab gem. - snippet_content = ::JSON.pretty_generate({ - commit: ci_commit_sha, - pipeline_url: ci_pipeline_url, - data: mock_data - }) - - expect(client).to receive(:create_snippet).with( - described_class::QA_EVALUATION_PROJECT_ID, - { - title: Time.now.utc.to_s, - files: [{ file_path: "#{Time.now.utc.to_i}.json", content: snippet_content }], - visibility: 'private' - } - ).and_return(snippet) - - # Create a new issue for the report - issue_title = "Report #{Time.now.utc}" - issue = double(web_url: issue_web_url) # rubocop: disable RSpec/VerifiedDoubles -- an internal detail of Gitlab gem. - - expect(client).to receive(:create_issue).with( - described_class::QA_EVALUATION_PROJECT_ID, - issue_title, - { description: reporter.markdown_report } - ).and_return(issue) - - # Updates the tracking issue by adding a row that links to the snippet and the issue just created. - aggregated_report_issue = double(description: "") # rubocop: disable RSpec/VerifiedDoubles -- an internal detail of Gitlab gem. - allow(client).to receive(:issue).with( - described_class::QA_EVALUATION_PROJECT_ID, - described_class::AGGREGATED_REPORT_ISSUE_IID - ).and_return(aggregated_report_issue) - row = "\n| #{Time.now.utc} | 1 | 100.0% | 0.0% | 0.0%" - row << " | #{issue_web_url} | #{snippet_web_url} |" - - expect(client).to receive(:edit_issue).with( - described_class::QA_EVALUATION_PROJECT_ID, - described_class::AGGREGATED_REPORT_ISSUE_IID, - { description: aggregated_report_issue.description + row } - ) - - reporter.run - end - end - - context 'when the CI pipeline is not running with the commit in `master` branch' do - let(:ci_commit_branch) { 'foobar' } - let(:ci_default_branch) { 'master' } - let(:qa_eval_report_filename) { 'report.md' } - let(:merge_request_iid) { "123" } - let(:ci_project_id) { "456" } - let(:ci_project_dir) { "/builds/gitlab-org/gitlab" } - let(:base_dir) { "#{ci_project_dir}/#{qa_eval_report_filename}" } - - before do - stub_env('QA_EVAL_REPORT_FILENAME', qa_eval_report_filename) - stub_env('CI_MERGE_REQUEST_IID', merge_request_iid) - stub_env('CI_PROJECT_ID', ci_project_id) - stub_env('CI_PROJECT_DIR', ci_project_dir) - end - - context 'when a note does not already exist' do - let(:note) { nil } - - it 'saves the report as a markdown file and creates a new MR note containing the report content' do - expect(File).to receive(:write).with(base_dir, reporter.markdown_report) - - allow(reporter).to receive(:existing_report_note).and_return(note) - expect(client).to receive(:create_merge_request_note).with( - ci_project_id, - merge_request_iid, - reporter.markdown_report - ) - - reporter.run - end - end - - context 'when a note exists' do - let(:note_id) { "1" } - let(:note) { double(id: note_id, type: "Note") } # rubocop: disable RSpec/VerifiedDoubles -- an internal detail of Gitlab gem. - - it 'saves the report as a markdown file and updates the existing MR note containing the report content' do - expect(File).to receive(:write).with(base_dir, reporter.markdown_report) - - allow(reporter).to receive(:existing_report_note).and_return(note) - expect(client).to receive(:edit_merge_request_note).with( - ci_project_id, - merge_request_iid, - note_id, - reporter.markdown_report - ) - - reporter.run - end - end - end - end - - describe '#markdown_report' do - let(:mock_data) do - [ - { - "question" => "question1", - "resource" => "resource", - "answer" => "answer1", - "tools_used" => ["foobar tool"], - "evaluations" => [ - { "model" => "claude-2", "response" => "Grade: CORRECT" }, - { "model" => "text-bison", "response" => "Grade: CORRECT" } - ] - }, - { - "question" => "question2", - "resource" => "resource", - "answer" => "answer2", - "tools_used" => [], - "evaluations" => [ - { "model" => "claude-2", "response" => " Grade: INCORRECT" }, - { "model" => "text-bison", "response" => "Grade: INCORRECT" } - ] - }, - { - "question" => "question3", - "resource" => "resource", - "answer" => "answer3", - "tools_used" => [], - "evaluations" => [ - { "model" => "claude-2", "response" => " Grade: CORRECT" }, - { "model" => "text-bison", "response" => "Grade: INCORRECT" } - ] - }, - { - "question" => "question4", - "resource" => "resource", - "answer" => "answer4", - "tools_used" => [], - # Note: The first evaluation (claude-2) is considered invalid and ignored. - "evaluations" => [ - { "model" => "claude-2", "response" => "???" }, - { "model" => "text-bison", "response" => "Grade: CORRECT" } - ] - }, - { - "question" => "question5", - "resource" => "resource", - "answer" => "answer5", - "tools_used" => [], - # Note: The second evaluation (text-bison) is considered invalid and ignored. - "evaluations" => [ - { "model" => "claude-2", "response" => " Grade: INCORRECT" }, - { "model" => "text-bison", "response" => "???" } - ] - }, - { - "question" => "question6", - "resource" => "resource", - "answer" => "answer6", - "tools_used" => [], - # Note: Both evaluations are invalid as they contain neither `CORRECT` nor `INCORRECT`. - # It should be ignored in the report. - "evaluations" => [ - { "model" => "claude-2", "response" => "???" }, - { "model" => "text-bison", "response" => "???" } - ] - } - ] - end - - before do - allow(reporter).to receive(:report_data).and_return(mock_data) - end - - it "generates the correct summary stats and uses the correct emoji indicators" do - expect(reporter.markdown_report).to include "The total number of evaluations: 5" - - expect(reporter.markdown_report).to include "all LLMs graded `CORRECT`: 2 (40.0%)" - expect(reporter.markdown_report).to include ":white_check_mark: :white_check_mark:" - expect(reporter.markdown_report).to include ":warning: :white_check_mark:" - - expect(reporter.markdown_report).to include "all LLMs graded `INCORRECT`: 2 (40.0%)" - expect(reporter.markdown_report).to include ":x: :x:" - expect(reporter.markdown_report).to include ":x: :warning:" - - expect(reporter.markdown_report).to include "in which LLMs disagreed: 1 (20.0%)" - expect(reporter.markdown_report).to include ":white_check_mark: :x:" - end - - it "includes the tools used" do - expect(reporter.markdown_report).to include "[\"foobar tool\"]" - end - - context 'when usernames are present' do - let(:mock_data) do - [ - { - "question" => "@user's @root?", - "resource" => "resource", - "answer" => "@user2 and @user3", - "tools_used" => ["foobar tool"], - "evaluations" => [ - { "model" => "claude-2", "response" => "Grade: CORRECT\n\n@user4" }, - { "model" => "text-bison", "response" => "Grade: CORRECT\n\n@user5" } - ] - } - ] - end - - it 'quotes the usernames with backticks' do - expect(reporter.markdown_report).to include "`@root`" - expect(reporter.markdown_report).to include "`@user`" - expect(reporter.markdown_report).to include "`@user2`" - expect(reporter.markdown_report).to include "`@user3`" - expect(reporter.markdown_report).to include "`@user4`" - expect(reporter.markdown_report).to include "`@user5`" - end - end - end -end diff --git a/spec/support/known_rspec_metadata_keys.yml b/spec/support/known_rspec_metadata_keys.yml index 01abb779e119a..8386cc9c81a9a 100644 --- a/spec/support/known_rspec_metadata_keys.yml +++ b/spec/support/known_rspec_metadata_keys.yml @@ -10,7 +10,6 @@ - :block - :broken_storage - :capybara_ignore_server_errors -- :chat_qa_evaluation - :ci_config_validation - :clean_gitlab_redis_buffered_counter - :clean_gitlab_redis_cache @@ -54,7 +53,6 @@ - :experiment - :factory_default - :fails_if_sidekiq_not_configured -- :fast_chat_qa_evaluation - :feature - :feature_category - :features @@ -168,6 +166,5 @@ - :with_sidekiq_context - :without_license - :yaml_processor_feature_flag_corectness -- :zeroshot_executor - :zoekt - :zoekt_settings_enabled -- GitLab