From e9bf820b5d28d10f4366ceeb6a74586050e3e250 Mon Sep 17 00:00:00 2001 From: Allen Cook <acook@gitlab.com> Date: Tue, 14 Nov 2023 17:41:24 +0000 Subject: [PATCH] Allow skipping generation instruction extraction --- ...code_generation_instruction_extraction.yml | 8 ++ .../instructions_extractor.rb | 23 ++++-- .../prompts/code_generation/anthropic.rb | 35 ++++++-- .../prompts/code_generation/examples.yml | 80 +++++++++++++++++- ee/lib/code_suggestions/task_factory.rb | 8 +- .../instructions_extractor_spec.rb | 82 ++++++++++++++++++- .../prompts/code_generation/anthropic_spec.rb | 81 ++++++++++++++++-- .../lib/code_suggestions/task_factory_spec.rb | 5 +- ee/spec/requests/api/code_suggestions_spec.rb | 1 + 9 files changed, 298 insertions(+), 25 deletions(-) create mode 100644 config/feature_flags/development/skip_code_generation_instruction_extraction.yml diff --git a/config/feature_flags/development/skip_code_generation_instruction_extraction.yml b/config/feature_flags/development/skip_code_generation_instruction_extraction.yml new file mode 100644 index 0000000000000..2418fa7ec399d --- /dev/null +++ b/config/feature_flags/development/skip_code_generation_instruction_extraction.yml @@ -0,0 +1,8 @@ +--- +name: skip_code_generation_instruction_extraction +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/136343 +rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/431246 +milestone: '16.7' +type: development +group: group::code creation +default_enabled: false diff --git a/ee/lib/code_suggestions/instructions_extractor.rb b/ee/lib/code_suggestions/instructions_extractor.rb index 7a1e8d252d048..18376d9551205 100644 --- a/ee/lib/code_suggestions/instructions_extractor.rb +++ b/ee/lib/code_suggestions/instructions_extractor.rb @@ -17,30 +17,35 @@ class InstructionsExtractor EMPTY_LINES_LIMIT = 1 - def initialize(file_content, intent, skip_generate_comment_prefix) + def initialize( + file_content, intent, + skip_generate_comment_prefix, + skip_instruction_extraction) @file_content = file_content @language = file_content.language @intent = intent @skip_generate_comment_prefix = skip_generate_comment_prefix + @skip_instruction_extraction = skip_instruction_extraction end def extract return {} if intent == INTENT_COMPLETION prefix, comment_block = prefix_and_comment(file_content.lines_above_cursor) - instruction = get_instruction(comment_block) + generation, instruction = get_instruction(comment_block) - return {} if !instruction && intent != INTENT_GENERATION + return {} if !generation && intent != INTENT_GENERATION { - prefix: prefix, + prefix: skip_instruction_extraction ? file_content.content_above_cursor : prefix, instruction: instruction } end private - attr_reader :language, :file_content, :intent, :skip_generate_comment_prefix + attr_reader :language, :file_content, :intent, + :skip_generate_comment_prefix, :skip_instruction_extraction def prefix_and_comment(lines) comment_block = [] @@ -68,24 +73,24 @@ def get_instruction(comment_block) .join("\n") .gsub(/GitLab Duo Generate:\s?/, '') - return instruction if instruction + return true, (skip_instruction_extraction ? '' : instruction) if instruction end if file_content.small? - return <<~PROMPT + return true, <<~PROMPT Create more new code for this file. If the cursor is inside an empty function, generate its most likely contents based on the function name and signature. PROMPT end if language.cursor_inside_empty_function?(file_content.content_above_cursor, file_content.content_below_cursor) - return <<~PROMPT + return true, <<~PROMPT Complete the empty function and generate contents based on the function name and signature. Do not repeat the code. Only return the method contents. PROMPT end - nil + [false, nil] end def first_line_regex diff --git a/ee/lib/code_suggestions/prompts/code_generation/anthropic.rb b/ee/lib/code_suggestions/prompts/code_generation/anthropic.rb index 209c61fe0dde4..d8569d49bc647 100644 --- a/ee/lib/code_suggestions/prompts/code_generation/anthropic.rb +++ b/ee/lib/code_suggestions/prompts/code_generation/anthropic.rb @@ -21,16 +21,18 @@ def request_params def prompt <<~PROMPT Human: You are a coding autocomplete agent. We want to generate new #{language.name} code inside the - file '#{file_path_info}' based on the instructions provided in <instruction> XML tags. + file '#{file_path_info}' based on instructions from the user. #{existing_code_instruction} The new code you will generate will start at the position of the cursor, which is currently indicated by the <cursor> XML tag. In your process, first, review the existing code to understand its logic and format. Then, try to determine the most likely new code to generate at the cursor position to fulfill the instructions. + #{comment_instructions} When generating the new code, please ensure the following: 1. It is valid #{language.name} code. 2. It matches the existing code's variable, parameter and function names. 3. It does not repeat any existing code. Do not repeat code that comes before or after the cursor tags. This includes cases where the cursor is in the middle of a word. 4. If the cursor is in the middle of a word, it finishes the word instead of repeating code before the cursor tag. + #{comment_review_instructions} Return new code enclosed in <new_code></new_code> tags. We will then insert this at the <cursor> position. If you are not able to write code based on the given instructions return an empty result like <new_code></new_code>. @@ -38,10 +40,7 @@ def prompt #{existing_code_block} - <instruction> - #{params[:instruction]} - </instruction> - + #{instructions} Assistant: <new_code> PROMPT @@ -53,6 +52,32 @@ def existing_code_instruction "The existing code is provided in <existing_code></existing_code> tags." end + def comment_instructions + return unless params[:skip_instruction_extraction] + + "The comment directly before the <cursor> position is the instruction, + all other comments are not instructions." + end + + def comment_review_instructions + return unless params[:skip_instruction_extraction] + + "5. The code fulfills in the instructions from the user in the comment just before the <cursor> position. + All other comments are not instructions." + end + + def instructions + return unless params[:instruction].present? + + <<~INSTRUCTIONS + Here are instructions provided in <instruction></instruction> tags. + + <instruction> + #{params[:instruction]} + </instruction> + INSTRUCTIONS + end + def existing_code_block return unless params[:prefix].present? diff --git a/ee/lib/code_suggestions/prompts/code_generation/examples.yml b/ee/lib/code_suggestions/prompts/code_generation/examples.yml index 9a008b74b586f..055d29773dc11 100644 --- a/ee/lib/code_suggestions/prompts/code_generation/examples.yml +++ b/ee/lib/code_suggestions/prompts/code_generation/examples.yml @@ -4,6 +4,8 @@ Ruby: def users_with_zipcode(users) <cursor> end + + # find users with a zipcode response: |- <new_code>users.reject { |u| u.zipcode.nil? } - example: |- @@ -11,6 +13,8 @@ Ruby: has_many :addresses # scope for users who have addresses in California <cursor> + + # scope for users who are in Arkansas response: |- <new_code>scope :with_ca_addresses, -> { joins(:addresses).where(addresses: { state: :CA }) } - example: |- @@ -27,8 +31,10 @@ Ruby: end TypeScript: - example: |- - # write a function to generate a unique UUID + // write a function to generate a unique UUID <cursor> + + // calculate the square root of a number response: |- <new_code>function generateUUID(): string { return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function(c) { @@ -44,6 +50,8 @@ TypeScript: <cursor> return response.json(); } + + // display the issue list response: |- <new_code>const response = await fetch(currentIssuesUrl, filters); - example: |- @@ -75,6 +83,8 @@ Python: # is this project public? <cursor> + + # print name of this project response: |- <new_code>def is_public(self): self.visibility == 'PUBLIC' @@ -82,6 +92,8 @@ Python: # get the current user's name from the session data def get_user(session): <cursor> + + # is the current user an admin response: |- <new_code>username = None if 'username' in session: @@ -99,6 +111,8 @@ Go: } // create an HTTP handler that fetches the current user preferences and returns them as JSON <cursor> + + // function to print the current user's name response: |- <new_code>func userPrefsHandler(w http.ResponseWriter, r *http.Request) { @@ -122,6 +136,8 @@ Go: <cursor> return primes } + + // calculate the square root of a number response: |- <new_code>var primes []int for _, num := range list { @@ -134,3 +150,65 @@ Go: } if isPrime { primes = append(primes, num) } } +JavaScript: + - example: |- + // write a function to generate a unique UUID + <cursor> + // this function prints someone's name + response: |- + <new_code>function generateUUID() { + return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function(c) { + var r = Math.random() * 16 | 0, v = c == 'x' ? r : (r & 0x3 | 0x8); + return v.toString(16); + }); + } + - example: |- + import { currentIssuesUrl } from './urls'; + + // fetch the current filtered list of issues + async function fetchData(filters) { + <cursor> + return response.json(); + } + + // function to display list of issues + response: |- + <new_code> + // build request URL with filters + let url = currentIssuesUrl; + if (filters) { + url += '?' + Object.entries(filters) + .map(([key, value]) => `${key}=${encodeURIComponent(value)}`) + .join('&'); + } + + // make fetch request + const response = await fetch(url); + - example: |- + // get the hotels near the user, be sure to check permission for location usage + async function getNearbyHotels(userLocation: GeolocationCoordinates): Promise<Hotel[]> { + <cursor> + return nearbyHotels; + } + response: |- + // check if we have permission to access location + if (!navigator.geolocation) { + throw new Error('Geolocation not supported'); + } + + let nearbyHotels = []; + + // make API request to get hotels + const response = await fetch('/api/hotels/nearby', { + method: 'POST', + headers: { + 'Content-Type': 'application/json' + }, + body: JSON.stringify(userLocation) + }); + + if (response.ok) { + nearbyHotels = await response.json(); + } else { + throw new Error('Failed to get nearby hotels'); + } diff --git a/ee/lib/code_suggestions/task_factory.rb b/ee/lib/code_suggestions/task_factory.rb index f8288d95521fe..2a3e79ca9081b 100644 --- a/ee/lib/code_suggestions/task_factory.rb +++ b/ee/lib/code_suggestions/task_factory.rb @@ -25,7 +25,7 @@ def initialize(current_user, params:, unsafe_passthrough_params: {}) def task file_content = CodeSuggestions::FileContent.new(language, prefix, suffix) instructions = CodeSuggestions::InstructionsExtractor - .new(file_content, intent, skip_generate_comment_prefix?).extract + .new(file_content, intent, skip_generate_comment_prefix?, skip_instruction_extraction?).extract if instructions.empty? return CodeSuggestions::Tasks::CodeCompletion.new( @@ -56,6 +56,11 @@ def skip_generate_comment_prefix? end strong_memoize_attr(:skip_generate_comment_prefix?) + def skip_instruction_extraction? + Feature.enabled?(:skip_code_generation_instruction_extraction, current_user) + end + strong_memoize_attr(:skip_instruction_extraction?) + def code_completion_model_family if code_completion_split_by_language? return ANTHROPIC_CODE_COMPLETION_LANGUAGES.include?(language&.name) ? ANTHROPIC : VERTEX_AI @@ -90,6 +95,7 @@ def code_generation_params(instructions) params.merge( prefix: instructions[:prefix], instruction: instructions[:instruction], + skip_instruction_extraction: skip_instruction_extraction?, code_generation_model_family: code_generation_model_family ) end diff --git a/ee/spec/lib/code_suggestions/instructions_extractor_spec.rb b/ee/spec/lib/code_suggestions/instructions_extractor_spec.rb index c800986abe44d..ea04331e27fc2 100644 --- a/ee/spec/lib/code_suggestions/instructions_extractor_spec.rb +++ b/ee/spec/lib/code_suggestions/instructions_extractor_spec.rb @@ -19,8 +19,11 @@ let(:file_content) { CodeSuggestions::FileContent.new(language, content, suffix) } let(:intent) { nil } let(:skip_generate_comment_prefix) { true } + let(:skip_instruction_extraction) { false } - subject { described_class.new(file_content, intent, skip_generate_comment_prefix).extract } + subject do + described_class.new(file_content, intent, skip_generate_comment_prefix, skip_instruction_extraction).extract + end context 'when content is nil' do let(:content) { nil } @@ -90,6 +93,17 @@ is_expected.to be_empty end end + + context 'when skipping instruction extraction' do + let(:skip_instruction_extraction) { true } + + it 'ignores the instruction and sends the code directly' do + is_expected.to eq({ + instruction: '', + prefix: content + }) + end + end end context 'when there is not instruction' do @@ -147,6 +161,17 @@ instruction: default_instruction }) end + + context 'when skipping instruction extraction' do + let(:skip_instruction_extraction) { true } + + it 'sets create instruction' do + is_expected.to eq({ + prefix: content, + instruction: default_instruction + }) + end + end end context 'when the last line is not a comment but code is less than 5 lines' do @@ -165,6 +190,17 @@ def fibonacci(x) instruction: default_instruction }) end + + context 'when skipping instruction extraction' do + let(:skip_instruction_extraction) { true } + + it 'finds the instruction' do + is_expected.to eq({ + prefix: content, + instruction: default_instruction + }) + end + end end context 'when there are some lines above the comment' do @@ -335,6 +371,17 @@ def fibonacci(x) instruction: default_instruction }) end + + context 'when skipping instruction extraction' do + let(:skip_instruction_extraction) { true } + + it "sets the create instruction" do + is_expected.to eq({ + prefix: content, + instruction: default_instruction + }) + end + end end context 'when there is content between comment lines' do @@ -477,6 +524,19 @@ def index(arg1, arg2): end end + context 'when skipping instruction extraction' do + let(:skip_instruction_extraction) { true } + + let(:suffix) { '' } + + specify do + is_expected.to eq( + prefix: content, + instruction: instruction + ) + end + end + context 'when cursor is inside an empty method but middle of the file' do let(:suffix) do <<~SUFFIX @@ -494,6 +554,26 @@ def index3(arg1): instruction: instruction ) end + + context 'when skipping instruction extraction' do + let(:skip_instruction_extraction) { true } + let(:suffix) do + <<~SUFFIX + def index2(): + return 0 + + def index3(arg1): + return 1 + SUFFIX + end + + specify do + is_expected.to eq( + prefix: content, + instruction: instruction + ) + end + end end context 'when cursor in inside a non-empty method' do diff --git a/ee/spec/lib/code_suggestions/prompts/code_generation/anthropic_spec.rb b/ee/spec/lib/code_suggestions/prompts/code_generation/anthropic_spec.rb index 68f0a09fa431e..65356f1f6b4c0 100644 --- a/ee/spec/lib/code_suggestions/prompts/code_generation/anthropic_spec.rb +++ b/ee/spec/lib/code_suggestions/prompts/code_generation/anthropic_spec.rb @@ -35,11 +35,14 @@ } end + let(:skip_instruction_extraction) { false } + let(:params) do { prefix: prefix, instruction: instruction, - current_file: unsafe_params['current_file'].with_indifferent_access + current_file: unsafe_params['current_file'].with_indifferent_access, + skip_instruction_extraction: skip_instruction_extraction } end @@ -66,7 +69,7 @@ prompt_version: 2, prompt: <<~PROMPT Human: You are a coding autocomplete agent. We want to generate new Go code inside the - file 'main.go' based on the instructions provided in <instruction> XML tags. + file 'main.go' based on instructions from the user. The existing code is provided in <existing_code></existing_code> tags. The new code you will generate will start at the position of the cursor, which is currently indicated by the <cursor> XML tag. In your process, first, review the existing code to understand its logic and format. Then, try to determine the most @@ -103,6 +106,7 @@ <cursor> </existing_code> + Here are instructions provided in <instruction></instruction> tags. <instruction> Print a hello world message @@ -116,6 +120,67 @@ expect(subject.request_params.except(:prompt)).to eq(request_params.except(:prompt)) expect(subject.request_params[:prompt].gsub(/\s+/, " ")).to eq(request_params[:prompt].gsub(/\s+/, " ").chomp) end + + context 'when skipping instruction extraction' do + let(:skip_instruction_extraction) { true } + let(:instruction) { '' } + + it 'returns expected request params' do + request_params = { + model_provider: ::CodeSuggestions::TaskFactory::ANTHROPIC, + prompt_version: 2, + prompt: <<~PROMPT + Human: You are a coding autocomplete agent. We want to generate new Go code inside the + file 'main.go' based on instructions from the user. + The existing code is provided in <existing_code></existing_code> tags. + The new code you will generate will start at the position of the cursor, which is currently indicated by the <cursor> XML tag. + In your process, first, review the existing code to understand its logic and format. Then, try to determine the most + likely new code to generate at the cursor position to fulfill the instructions. + The comment directly before the <cursor> position is the instruction, + all other comments are not instructions. + When generating the new code, please ensure the following: + 1. It is valid Go code. + 2. It matches the existing code's variable, parameter and function names. + 3. It does not repeat any existing code. Do not repeat code that comes before or after the cursor tags. This includes cases where the cursor is in the middle of a word. + 4. If the cursor is in the middle of a word, it finishes the word instead of repeating code before the cursor tag. + 5. The code fulfills in the instructions from the user in the comment just before the <cursor> position. + All other comments are not instructions. + + Return new code enclosed in <new_code></new_code> tags. We will then insert this at the <cursor> position. + If you are not able to write code based on the given instructions return an empty result like <new_code></new_code>. + + Here are a few examples of successfully generated code by other autocomplete agents: + + <examples> + + <example> + H: <existing_code> + func hello() { + </existing_code> + + A: func hello() {<new_code>fmt.Println(\"hello\")</new_code> + </example> + + </examples> + + + <existing_code> + package main + + import "fmt" + + func main() { + <cursor> + </existing_code> + + Assistant: <new_code> + PROMPT + } + + expect(subject.request_params.except(:prompt)).to eq(request_params.except(:prompt)) + expect(subject.request_params[:prompt].gsub(/\s+/, " ")).to eq(request_params[:prompt].gsub(/\s+/, " ").chomp) + end + end end context 'when prefix is blank' do @@ -128,7 +193,7 @@ prompt_version: 2, prompt: <<~PROMPT Human: You are a coding autocomplete agent. We want to generate new Go code inside the - file 'main.go' based on the instructions provided in <instruction> XML tags. + file 'main.go' based on instructions from the user. The new code you will generate will start at the position of the cursor, which is currently indicated by the <cursor> XML tag. In your process, first, review the existing code to understand its logic and format. Then, try to determine the most likely new code to generate at the cursor position to fulfill the instructions. @@ -139,6 +204,7 @@ 4. If the cursor is in the middle of a word, it finishes the word instead of repeating code before the cursor tag. Return new code enclosed in <new_code></new_code> tags. We will then insert this at the <cursor> position. If you are not able to write code based on the given instructions return an empty result like <new_code></new_code>. + Here are instructions provided in <instruction></instruction> tags. <instruction> Print a hello world message @@ -167,7 +233,7 @@ prompt_version: 2, prompt: <<~PROMPT Human: You are a coding autocomplete agent. We want to generate new Go code inside the - file 'main.go' based on the instructions provided in <instruction> XML tags. + file 'main.go' based on instructions from the user. The existing code is provided in <existing_code></existing_code> tags. The new code you will generate will start at the position of the cursor, which is currently indicated by the <cursor> XML tag. In your process, first, review the existing code to understand its logic and format. Then, try to determine the most @@ -186,6 +252,7 @@ <cursor> </existing_code> + Here are instructions provided in <instruction></instruction> tags. <instruction> Print a hello world message @@ -211,7 +278,7 @@ prompt_version: 2, prompt: <<~PROMPT Human: You are a coding autocomplete agent. We want to generate new code inside the - file 'file_without_extension' based on the instructions provided in <instruction> XML tags. + file 'file_without_extension' based on instructions from the user. The existing code is provided in <existing_code></existing_code> tags. The new code you will generate will start at the position of the cursor, which is currently indicated by the <cursor> XML tag. In your process, first, review the existing code to understand its logic and format. Then, try to determine the most @@ -233,6 +300,7 @@ <cursor> </existing_code> + Here are instructions provided in <instruction></instruction> tags. <instruction> Print a hello world message @@ -259,7 +327,7 @@ prompt_version: 2, prompt: <<~PROMPT Human: You are a coding autocomplete agent. We want to generate new code inside the - file 'README.md' based on the instructions provided in <instruction> XML tags. + file 'README.md' based on instructions from the user. The existing code is provided in <existing_code></existing_code> tags. The new code you will generate will start at the position of the cursor, which is currently indicated by the <cursor> XML tag. In your process, first, review the existing code to understand its logic and format. Then, try to determine the most @@ -281,6 +349,7 @@ <cursor> </existing_code> + Here are instructions provided in <instruction></instruction> tags. <instruction> Print a hello world message diff --git a/ee/spec/lib/code_suggestions/task_factory_spec.rb b/ee/spec/lib/code_suggestions/task_factory_spec.rb index 318ba75e3412f..79d9e19114c0c 100644 --- a/ee/spec/lib/code_suggestions/task_factory_spec.rb +++ b/ee/spec/lib/code_suggestions/task_factory_spec.rb @@ -29,7 +29,7 @@ it 'calls instructions extractor with expected params' do expect(CodeSuggestions::InstructionsExtractor) .to receive(:new) - .with(an_instance_of(CodeSuggestions::FileContent), nil, true) + .with(an_instance_of(CodeSuggestions::FileContent), nil, true, true) .and_call_original subject @@ -96,7 +96,8 @@ params: params.merge( code_generation_model_family: expected_family, instruction: 'instruction', - prefix: 'trimmed prefix' + prefix: 'trimmed prefix', + skip_instruction_extraction: true ), unsafe_passthrough_params: {} } diff --git a/ee/spec/requests/api/code_suggestions_spec.rb b/ee/spec/requests/api/code_suggestions_spec.rb index ab91d06da8c2a..cb71e2477f66e 100644 --- a/ee/spec/requests/api/code_suggestions_spec.rb +++ b/ee/spec/requests/api/code_suggestions_spec.rb @@ -23,6 +23,7 @@ before do stub_feature_flags(code_completion_anthropic: false) stub_feature_flags(code_completion_split_by_language: false) + stub_feature_flags(skip_code_generation_instruction_extraction: false) allow(Gitlab).to receive(:org_or_com?).and_return(is_saas) allow(Ability).to receive(:allowed?).and_call_original -- GitLab