From e1d788af0d0f2c742ec209c75605f23371a9ccff Mon Sep 17 00:00:00 2001 From: Tian Gao <tgao@gitlab.com> Date: Wed, 2 Oct 2024 22:13:29 +0000 Subject: [PATCH] Migrate prompt from rails to AIGW for anthropic In this MR we migrate params for context_block, libraries_block, user_instruction block to AIGW. --- .../code_generation/ai_gateway_messages.rb | 66 ++++++- .../ai_gateway_messages_spec.rb | 36 ++++ ...mples.rb => ai_gateway_shared_examples.rb} | 177 +++++++++++++++++- .../anthropic_messages_spec.rb | 4 +- .../tasks/code_generation_spec.rb | 90 +++++---- ee/spec/requests/api/code_suggestions_spec.rb | 8 +- 6 files changed, 324 insertions(+), 57 deletions(-) create mode 100644 ee/spec/lib/code_suggestions/prompts/code_generation/ai_gateway_messages_spec.rb rename ee/spec/lib/code_suggestions/prompts/code_generation/{anthropic_shared_examples.rb => ai_gateway_shared_examples.rb} (83%) diff --git a/ee/lib/code_suggestions/prompts/code_generation/ai_gateway_messages.rb b/ee/lib/code_suggestions/prompts/code_generation/ai_gateway_messages.rb index 176c9f5237f0..29a42f4d9e0d 100644 --- a/ee/lib/code_suggestions/prompts/code_generation/ai_gateway_messages.rb +++ b/ee/lib/code_suggestions/prompts/code_generation/ai_gateway_messages.rb @@ -12,7 +12,6 @@ class AiGatewayMessages < CodeSuggestions::Prompts::Base # response time grows with prompt size, so we don't use upper limit size of prompt window MAX_INPUT_CHARS = 50000 GATEWAY_PROMPT_VERSION = 3 - CONTENT_TYPES = { file: 'file', snippet: 'snippet' }.freeze def request_params { @@ -37,7 +36,10 @@ def request_params def code_generation_enhancer { **examples_section_params, - **existing_code_block_params + **existing_code_block_params, + **context_block_params, + **libraries_block_params, + **user_instruction_params } end @@ -50,8 +52,6 @@ def examples_section_params end def existing_code_block_params - return {} unless params[:prefix].present? - trimmed_prefix = prefix.to_s.last(MAX_INPUT_CHARS) trimmed_suffix = suffix.to_s.first(MAX_INPUT_CHARS - trimmed_prefix.size) @@ -60,6 +60,64 @@ def existing_code_block_params trimmed_suffix: trimmed_suffix } end + + def context_block_params + related_files = [] + related_snippets = [] + + params[:context]&.each do |context| + if context[:type] == ::Ai::AdditionalContext::CODE_SUGGESTIONS_CONTEXT_TYPES[:file] + related_files << <<~FILE_CONTENT + <file_content file_name="#{context[:name]}"> + #{context[:content]} + </file_content> + FILE_CONTENT + elsif context[:type] == ::Ai::AdditionalContext::CODE_SUGGESTIONS_CONTEXT_TYPES[:snippet] + related_snippets << <<~SNIPPET_CONTENT + <snippet_content name="#{context[:name]}"> + #{context[:content]} + </snippet_content> + SNIPPET_CONTENT + end + end + + { + related_files: related_files, + related_snippets: related_snippets + } + end + + def libraries_block_params + if libraries.any? + Gitlab::InternalEvents.track_event( + 'include_repository_xray_data_into_code_generation_prompt', + project: params[:project], + namespace: params[:project]&.namespace, + user: params[:current_user] + ) + end + + { libraries: libraries } + end + + def libraries + return [] unless xray_report + + xray_report.libs.map { |l| l['name'] } # rubocop:disable Rails/Pluck -- libs is an array + end + strong_memoize_attr :libraries + + def xray_report + ::Projects::XrayReport.for_project(params[:project]).for_lang(language.x_ray_lang).first + end + strong_memoize_attr :xray_report + + def user_instruction_params + instruction = params[:instruction]&.instruction.presence || + 'Generate the best possible code based on instructions.' + + { user_instruction: instruction } + end end end end diff --git a/ee/spec/lib/code_suggestions/prompts/code_generation/ai_gateway_messages_spec.rb b/ee/spec/lib/code_suggestions/prompts/code_generation/ai_gateway_messages_spec.rb new file mode 100644 index 000000000000..ec60784bcda0 --- /dev/null +++ b/ee/spec/lib/code_suggestions/prompts/code_generation/ai_gateway_messages_spec.rb @@ -0,0 +1,36 @@ +# frozen_string_literal: true + +require 'spec_helper' +require_relative 'ai_gateway_shared_examples' + +RSpec.describe CodeSuggestions::Prompts::CodeGeneration::AiGatewayMessages, feature_category: :code_suggestions do + let(:prompt_version) { 3 } + + it_behaves_like 'code generation AI Gateway request params' do + def expected_request_params + { + prompt_components: [ + { + type: 'code_editor_generation', + payload: { + file_name: expected_file_name, + content_above_cursor: expected_content_above_cursor, + content_below_cursor: expected_content_below_cursor, + language_identifier: expected_language_identifier, + prompt_id: 'code_suggestions/generations', + prompt_enhancer: { + examples_array: expected_examples_array, + trimmed_prefix: expected_trimmed_prefix, + trimmed_suffix: expected_trimmed_suffix, + related_files: expected_related_files, + related_snippets: expected_related_snippets, + libraries: expected_libraries, + user_instruction: expected_user_instruction + } + } + } + ] + } + end + end +end diff --git a/ee/spec/lib/code_suggestions/prompts/code_generation/anthropic_shared_examples.rb b/ee/spec/lib/code_suggestions/prompts/code_generation/ai_gateway_shared_examples.rb similarity index 83% rename from ee/spec/lib/code_suggestions/prompts/code_generation/anthropic_shared_examples.rb rename to ee/spec/lib/code_suggestions/prompts/code_generation/ai_gateway_shared_examples.rb index cf7a4a23cc1b..87274e8de721 100644 --- a/ee/spec/lib/code_suggestions/prompts/code_generation/anthropic_shared_examples.rb +++ b/ee/spec/lib/code_suggestions/prompts/code_generation/ai_gateway_shared_examples.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true -RSpec.shared_examples 'anthropic prompt' do +# AIGW v2 api for code generation receives prompt +RSpec.shared_examples 'code generation AI Gateway request prompt' do let(:language) { instance_double(CodeSuggestions::ProgrammingLanguage, x_ray_lang: x_ray_lang) } let(:language_name) { 'Go' } let(:x_ray_lang) { nil } @@ -316,19 +317,19 @@ context 'when context is available' do let(:main_go_content) do <<~CONTENT - package main + package main - func main() - fullName("John", "Doe") - } + func main() + fullName("John", "Doe") + } CONTENT end let(:full_name_func_content) do <<~CONTENT - func fullName(first, last string) { - fmt.Println(first, last) - } + func fullName(first, last string) { + fmt.Println(first, last) + } CONTENT end @@ -615,3 +616,163 @@ end end end + +# AIGW v3 api for code generation receives params +RSpec.shared_examples 'code generation AI Gateway request params' do + let_it_be(:current_user) { create(:user) } + let_it_be(:project) { create(:project) } + + let(:file_name) { 'main.go' } + let(:content_above_cursor) { "package main\n\nimport \"fmt\"\n\nfunc main() {\n" } + let(:content_below_cursor) { "func test() {\n" } + let(:comment) { 'My comment instructions' } + let(:instruction) { instance_double(CodeSuggestions::Instruction, instruction: comment, trigger_type: 'comment') } + let(:examples) { [{ example: 'func hello() {', response: 'func hello() {<new_code>fmt.Println("hello")' }] } + + let(:context) do + [ + { type: 'file', name: 'main.go', content: + <<~CONTENT + package main + + func main() + fullName("John", "Doe") + } + CONTENT + }, + { type: 'snippet', name: 'fullName', content: + <<~CONTENT + func fullName(first, last string) { + fmt.Println(first, last) + } + CONTENT + } + ] + end + + let(:current_file_params) do + { + file_name: file_name, + content_above_cursor: content_above_cursor, + content_below_cursor: content_below_cursor + } + end + + let(:params) do + { + current_user: current_user, + project: project, + instruction: instruction, + current_file: current_file_params, + context: context + } + end + + subject { described_class.new(params) } + + describe '#request_params' do + context 'when all parameters are present' do + before_all do + create(:xray_report, lang: 'go', project: project, + payload: { libs: [{ name: 'zlib (1.2.3)' }, { name: 'boost (2.0.0)' }] }) + end + + let(:expected_file_name) { file_name } + let(:expected_content_above_cursor) { content_above_cursor } + let(:expected_content_below_cursor) { content_below_cursor } + let(:expected_language_identifier) { 'Go' } + let(:expected_examples_array) { examples } + let(:expected_trimmed_prefix) { content_above_cursor } + let(:expected_trimmed_suffix) { content_below_cursor } + let(:expected_libraries) { ['zlib (1.2.3)', 'boost (2.0.0)'] } + let(:expected_user_instruction) { comment } + + let(:expected_related_files) do + [ + "<file_content file_name=\"main.go\">\npackage main\n\nfunc main()\n " \ + "fullName(\"John\", \"Doe\")\n}\n\n</file_content>\n" + ] + end + + let(:expected_related_snippets) do + [ + "<snippet_content name=\"fullName\">\nfunc fullName(first, last string) {\n " \ + "fmt.Println(first, last)\n}\n\n</snippet_content>\n" + ] + end + + before do + allow_next_instance_of(CodeSuggestions::ProgrammingLanguage) do |instance| + allow(instance).to receive(:generation_examples).with(type: instruction.trigger_type).and_return(examples) + end + end + + it 'returns expected request params' do + expect(subject.request_params).to eq(expected_request_params) + end + + it 'tracks an X-Ray event' do + expect(Gitlab::InternalEvents).to receive(:track_event).with( + 'include_repository_xray_data_into_code_generation_prompt', + project: project, + namespace: project.namespace, + user: current_user + ) + + subject.request_params + end + + context 'when the prefix length exceeds the prompt limit' do + let(:limit) { 10 } + let(:expected_trimmed_prefix) { content_above_cursor.last(limit) } + let(:expected_trimmed_suffix) { '' } + + before do + stub_const('CodeSuggestions::Prompts::CodeGeneration::AiGatewayMessages::MAX_INPUT_CHARS', limit) + end + + it 'returns expected request params' do + expect(subject.request_params).to eq(expected_request_params) + end + + context 'when the combined prefix and suffix length exceeds the prompt limit' do + let(:limit) { content_above_cursor.size + 5 } + let(:expected_trimmed_prefix) { content_above_cursor } + let(:expected_trimmed_suffix) { content_below_cursor.first(5) } + + it 'returns expected request params' do + expect(subject.request_params).to eq(expected_request_params) + end + end + end + end + + context 'when all parameters are blank' do + let(:instruction) { nil } + let(:context) { nil } + let(:current_file_params) { nil } + + let(:expected_file_name) { '' } + let(:expected_content_above_cursor) { nil } + let(:expected_content_below_cursor) { nil } + let(:expected_language_identifier) { '' } + let(:expected_examples_array) { [] } + let(:expected_trimmed_prefix) { '' } + let(:expected_trimmed_suffix) { '' } + let(:expected_libraries) { [] } + let(:expected_user_instruction) { 'Generate the best possible code based on instructions.' } + let(:expected_related_files) { [] } + let(:expected_related_snippets) { [] } + + it 'returns expected request params' do + expect(subject.request_params).to eq(expected_request_params) + end + + it 'does not track an X-Ray event' do + expect(Gitlab::InternalEvents).not_to receive(:track_event) + + subject.request_params + end + end + end +end diff --git a/ee/spec/lib/code_suggestions/prompts/code_generation/anthropic_messages_spec.rb b/ee/spec/lib/code_suggestions/prompts/code_generation/anthropic_messages_spec.rb index dda9504c2cae..00ac633ac452 100644 --- a/ee/spec/lib/code_suggestions/prompts/code_generation/anthropic_messages_spec.rb +++ b/ee/spec/lib/code_suggestions/prompts/code_generation/anthropic_messages_spec.rb @@ -1,12 +1,12 @@ # frozen_string_literal: true require 'spec_helper' -require_relative 'anthropic_shared_examples' +require_relative 'ai_gateway_shared_examples' RSpec.describe CodeSuggestions::Prompts::CodeGeneration::AnthropicMessages, feature_category: :code_suggestions do let(:prompt_version) { 3 } - it_behaves_like 'anthropic prompt' do + it_behaves_like 'code generation AI Gateway request prompt' do def expected_prompt [ { role: :system, content: system_prompt }, diff --git a/ee/spec/lib/code_suggestions/tasks/code_generation_spec.rb b/ee/spec/lib/code_suggestions/tasks/code_generation_spec.rb index be242d1855a4..849816929a47 100644 --- a/ee/spec/lib/code_suggestions/tasks/code_generation_spec.rb +++ b/ee/spec/lib/code_suggestions/tasks/code_generation_spec.rb @@ -44,30 +44,34 @@ let(:anthropic_request_params) do { - "prompt_components" => [ + 'prompt_components' => [ { - "type" => "code_editor_generation", - "payload" => { - "file_name" => "test.py", - "content_above_cursor" => "some prefix", - "content_below_cursor" => "some suffix", - "language_identifier" => "Python", - "prompt_id" => "code_suggestions/generations", - "prompt_enhancer" => { - "examples_array" => [ + 'type' => 'code_editor_generation', + 'payload' => { + 'file_name' => 'test.py', + 'content_above_cursor' => 'some prefix', + 'content_below_cursor' => 'some suffix', + 'language_identifier' => 'Python', + 'prompt_id' => 'code_suggestions/generations', + 'prompt_enhancer' => { + 'examples_array' => [ { - "example" => "class Project:\\n def __init__(self, name, public):{{cursor}}\\n\\n ", - "response" => "return self.visibility == 'PUBLIC'", - "trigger_type" => "comment" + 'example' => 'class Project:\\n def __init__(self, name, public):{{cursor}}\\n\\n ', + 'response' => "return self.visibility == 'PUBLIC'", + 'trigger_type' => 'comment' }, { - "example" => "# get the current user's name from the session data\\n{{cursor}}", - "response" => "username = session['username']\\nreturn username", - "trigger_type" => "comment" + 'example' => "# get the current user's name from the session data\\n{{cursor}}", + 'response' => "username = session['username']\\nreturn username", + 'trigger_type' => 'comment' } ], - "trimmed_prefix" => "some prefix", - "trimmed_suffix" => "some suffix" + 'trimmed_prefix' => 'some prefix', + 'trimmed_suffix' => 'some suffix', + 'related_files' => '', + 'related_snippets' => '', + 'libraries' => '', + 'user_instruction' => 'Generate the best possible code based on instructions.' } } } @@ -94,40 +98,44 @@ let(:endpoint_path) { 'v3/code/completions' } let(:expected_body) do { - "current_file" => { - "content_above_cursor" => "fix", - "content_below_cursor" => "som", - "file_name" => "test.py" + 'current_file' => { + 'content_above_cursor' => 'fix', + 'content_below_cursor' => 'som', + 'file_name' => 'test.py' }, - "prompt_components" => [ + 'prompt_components' => [ { - "payload" => { - "content_above_cursor" => "some prefix", - "content_below_cursor" => "some suffix", - "file_name" => "test.py", - "language_identifier" => "Python", - "prompt_enhancer" => { - "examples_array" => [ + 'payload' => { + 'content_above_cursor' => 'some prefix', + 'content_below_cursor' => 'some suffix', + 'file_name' => 'test.py', + 'language_identifier' => 'Python', + 'prompt_enhancer' => { + 'examples_array' => [ { - "example" => "class Project:\\n def __init__(self, name, public):{{cursor}}\\n\\n ", - "response" => "return self.visibility == 'PUBLIC'", - "trigger_type" => "comment" + 'example' => 'class Project:\\n def __init__(self, name, public):{{cursor}}\\n\\n ', + 'response' => "return self.visibility == 'PUBLIC'", + 'trigger_type' => 'comment' }, { - "example" => "# get the current user's name from the session data\\n{{cursor}}", - "response" => "username = session['username']\\nreturn username", - "trigger_type" => "comment" + 'example' => "# get the current user's name from the session data\\n{{cursor}}", + 'response' => "username = session['username']\\nreturn username", + 'trigger_type' => 'comment' } ], - "trimmed_prefix" => "some prefix", - "trimmed_suffix" => "some suffix" + 'trimmed_prefix' => 'some prefix', + 'trimmed_suffix' => 'some suffix', + 'related_files' => '', + 'related_snippets' => '', + 'libraries' => '', + 'user_instruction' => 'Generate the best possible code based on instructions.' }, - "prompt_id" => "code_suggestions/generations" + 'prompt_id' => 'code_suggestions/generations' }, - "type" => "code_editor_generation" + 'type' => 'code_editor_generation' } ], - "telemetry" => [{ "model_engine" => "anthropic" }] + 'telemetry' => [{ 'model_engine' => 'anthropic' }] } end diff --git a/ee/spec/requests/api/code_suggestions_spec.rb b/ee/spec/requests/api/code_suggestions_spec.rb index d10bd3c9b9c1..bcf377b6b856 100644 --- a/ee/spec/requests/api/code_suggestions_spec.rb +++ b/ee/spec/requests/api/code_suggestions_spec.rb @@ -191,9 +191,13 @@ def is_even(n: int) -> "trigger_type" => "comment" } ], - "trimmed_prefix" => "def is_even(n: int) ->\n# A " \ + 'trimmed_prefix' => "def is_even(n: int) ->\n# A " \ "function that outputs the first 20 fibonacci numbers\n", - "trimmed_suffix" => "" + 'trimmed_suffix' => '', + 'related_files' => [], + 'related_snippets' => [], + 'libraries' => [], + 'user_instruction' => 'Generate the best possible code based on instructions.' } } } -- GitLab