diff --git a/ee/lib/gitlab/llm/anthropic/completions/review_merge_request.rb b/ee/lib/gitlab/llm/anthropic/completions/review_merge_request.rb index dc649ac2599c486b86eceb6b849023d037cf67ed..e8b259fb950a0cc1ee8971ec41e0020c1232248e 100644 --- a/ee/lib/gitlab/llm/anthropic/completions/review_merge_request.rb +++ b/ee/lib/gitlab/llm/anthropic/completions/review_merge_request.rb @@ -89,58 +89,26 @@ def note_not_required?(response_modifier) def build_draft_notes(response_modifier, diff_file, diff_refs) return if note_not_required?(response_modifier) - response = Nokogiri::XML.parse(response_modifier.response_body) - - expected_attrs = %w[priority old_line new_line] - response.css('review comment').each do |comment| - next unless (expected_attrs - comment.keys).blank? - - old_line = comment['old_line'].blank? ? nil : comment['old_line'].to_i - new_line = comment['new_line'].blank? ? nil : comment['new_line'].to_i + parsed_body = ResponseBodyParser.new(response_modifier.response_body) + parsed_body.comments.each do |comment| # NOTE: LLM may return invalid line numbers sometimes so we should double check the existence of the line. - line = diff_file.diff_lines.find { |line| line.old_line == old_line && line.new_line == new_line } + line = diff_file.diff_lines.find do |line| + line.old_line == comment.old_line && line.new_line == comment.new_line + end + next unless line.present? - note_content = parsed_note_content(comment) - draft_note_params = build_draft_note_params(note_content, diff_file, line, diff_refs) + draft_note_params = build_draft_note_params(comment.content, diff_file, line, diff_refs) next unless draft_note_params.present? @draft_notes_by_priority << [ - comment['priority'].to_i, + comment.priority, draft_note_params ] end end - def parsed_note_content(comment) - # NOTE: We should avoid parsing the content if it is not in an expected suggestion format. - return comment.inner_html.delete_prefix("\n") unless comment.element_children.map(&:name) == %w[from to] - - line_offset_below = 0 - - comment.children.map do |element| - inner_html = element.inner_html - - case element.name - when 'from' - # NOTE: We're just interested in counting the existing lines as LLM doesn't - # seem to be able to reliably set this by itself. - # We need to remove the starting line breaks to count content lines correctly - # as the element.inner_html includes the initial line break as in `<from>\n`. - line_offset_below = inner_html.delete_prefix("\n").lines.count - 1 - '' - when 'to' - # NOTE: Sometimes LLM returns inline tags like `<to> some text</to>` for single line suggestions - # so we need to handle that here to make sure the suggestion format is correct. - content = inner_html.start_with?("\n") ? inner_html : "\n#{inner_html}\n" - "```suggestion:-0+#{line_offset_below}#{content}```\n" - when 'text' - element.inner_text.delete_prefix("\n") - end - end.join - end - def build_draft_note_params(comment, diff_file, line, diff_refs) position = { base_sha: diff_refs.base_sha, diff --git a/ee/lib/gitlab/llm/anthropic/completions/review_merge_request/response_body_parser.rb b/ee/lib/gitlab/llm/anthropic/completions/review_merge_request/response_body_parser.rb new file mode 100644 index 0000000000000000000000000000000000000000..f4951c7831fd0943acf503f4a4a47a7f4b67bab4 --- /dev/null +++ b/ee/lib/gitlab/llm/anthropic/completions/review_merge_request/response_body_parser.rb @@ -0,0 +1,107 @@ +# frozen_string_literal: true + +module Gitlab + module Llm + module Anthropic + module Completions + class ReviewMergeRequest + class ResponseBodyParser + include ::Gitlab::Utils::StrongMemoize + + class Comment + ATTRIBUTES = %w[priority old_line new_line].freeze + + attr_reader :attributes, :content + + def initialize(attrs, content) + @attributes = attrs + @content = content + end + + ATTRIBUTES.each do |attr| + define_method(attr) do + Integer(attributes[attr], exception: false) + end + end + + def valid? + return false if priority.blank? || content.blank? + return false if old_line.blank? && new_line.blank? + + true + end + end + + attr_reader :response + + def initialize(response) + @response = response + end + + def comments + return [] if response.blank? + + review_content = response.match(review_wrapper_regex) + + return [] if review_content.blank? + + review_content[1].scan(comment_wrapper_regex).filter_map do |attrs, body| + comment = Comment.new(parsed_attrs(attrs), parsed_content(body)) + comment if comment.valid? + end + end + strong_memoize_attr :comments + + private + + def review_wrapper_regex + %r{^<review>(.+)</review>$}m + end + + def comment_wrapper_regex + %r{^<comment (.+?)>(?:\n?)(.+?)</comment>$}m + end + + def comment_attr_regex + %r{([^\s]*?)="(.*?)"} + end + + def code_suggestion_regex + # NOTE: We might get multiple code suggestions on the same line as that's still valid so we should take + # that possibility of extra `<from>` into account here. + # Also, sometimes LLM returns tags inline like `<to> some text</to>` for single line suggestions which + # we need to handle as well just in case. + %r{(.*?)^(?:<from>\n(.*?)^</from>\n<to>(.*?)^</to>|<from>(.+?)</from>\n<to>(.+?)</to>)(.*?)(?=<from>|\z)}m + end + + def parsed_attrs(attrs) + Hash[attrs.scan(comment_attr_regex)] + end + + def parsed_content(body) + body_with_suggestions = body + .scan(code_suggestion_regex) + .map do |header, multiline_from, multiline_to, inline_from, inline_to, footer| + # NOTE: We're just interested in counting the existing lines as LLM doesn't + # seem to be able to reliably set this by itself. + # Also, since we have two optional matching pairs so either `multiline_from` and `multiline_to` or + # `inline_from` and `inline_to` would exist. + line_offset_below = (multiline_from || inline_from).lines.count - 1 + + # NOTE: Inline code suggestion needs to be wrapped in new lines to format it correctly. + comment = inline_to.nil? ? multiline_to : "\n#{inline_to}\n" + + "#{header}```suggestion:-0+#{line_offset_below}#{comment}```#{footer}" + end + + # NOTE: Return original body if the body doesn't have any expected suggestion format. + return body unless body_with_suggestions.present? + + body_with_suggestions.join + end + end + end + end + end + end +end diff --git a/ee/spec/lib/gitlab/llm/anthropic/completions/review_merge_request/response_body_parser_spec.rb b/ee/spec/lib/gitlab/llm/anthropic/completions/review_merge_request/response_body_parser_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..3515fa537baf2e211f9551b248d59ff5575068b8 --- /dev/null +++ b/ee/spec/lib/gitlab/llm/anthropic/completions/review_merge_request/response_body_parser_spec.rb @@ -0,0 +1,580 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Llm::Anthropic::Completions::ReviewMergeRequest::ResponseBodyParser, feature_category: :code_review_workflow do + subject(:parser) { described_class.new(body) } + + let(:body) { nil } + + describe '#parse' do + context 'with valid content' do + context 'with text only comment' do + let(:body) do + <<~RESPONSE + <review> + <comment priority="3" old_line="1" new_line="2"> + First line of comment + Second line of comment + + Third line of comment + </comment> + </review> + RESPONSE + end + + it 'returns the expected comment' do + comment = parser.comments.sole + + expect(comment.priority).to eq 3 + expect(comment.old_line).to eq 1 + expect(comment.new_line).to eq 2 + expect(comment.content).to eq <<~NOTE_CONTENT + First line of comment + Second line of comment + + Third line of comment + NOTE_CONTENT + end + end + + context 'when old_line attribute is empty' do + let(:body) do + <<~RESPONSE + <review> + <comment priority="3" old_line="" new_line="2"> + Example comment + </comment> + </review> + RESPONSE + end + + it 'returns the expected comment' do + comment = parser.comments.sole + + expect(comment.priority).to be 3 + expect(comment.old_line).to be_nil + expect(comment.new_line).to be 2 + expect(comment.content).to eq <<~NOTE_CONTENT + Example comment + NOTE_CONTENT + end + end + + context 'when new_line attribute is empty' do + let(:body) do + <<~RESPONSE + <review> + <comment priority="3" old_line="1" new_line=""> + Example comment + </comment> + </review> + RESPONSE + end + + it 'returns the expected comment' do + comment = parser.comments.sole + + expect(comment.priority).to be 3 + expect(comment.old_line).to be 1 + expect(comment.new_line).to be_nil + expect(comment.content).to eq <<~NOTE_CONTENT + Example comment + NOTE_CONTENT + end + end + + context 'with code suggestion' do + let(:body) do + <<~RESPONSE + <review> + <comment priority="3" old_line="1" new_line="2"> + First comment with suggestions + <from> + first offending line + </from> + <to> + first improved line + </to> + Some more comments + </comment> + </review> + RESPONSE + end + + it 'returns the expected comment' do + comment = parser.comments.sole + + expect(comment.content).to eq <<~NOTE_CONTENT + First comment with suggestions + ```suggestion:-0+0 + first improved line + ``` + Some more comments + NOTE_CONTENT + end + + context 'when <from> and <to> tags are inlined' do + let(:body) do + <<~RESPONSE + <review> + <comment priority="3" old_line="1" new_line="2"> + First comment with suggestions + <from> first offending line</from> + <to> first improved line</to> + Some more comments + </comment> + </review> + RESPONSE + end + + it 'returns the expected comment' do + comment = parser.comments.sole + + expect(comment.content).to eq <<~NOTE_CONTENT + First comment with suggestions + ```suggestion:-0+0 + first improved line + ``` + Some more comments + NOTE_CONTENT + end + end + + context 'when the response contains a multiline suggestion' do + let(:body) do + <<~RESPONSE + <review> + <comment priority="3" old_line="1" new_line="2"> + First comment with a suggestion + <from> + first offending line + second offending line + third offending line + </from> + <to> + first improved line + second improved line + third improved line + fourth improved line + </to> + Some more comments + </comment> + </review> + RESPONSE + end + + it 'returns the expected comment' do + comment = parser.comments.sole + + expect(comment.content).to eq <<~NOTE_CONTENT + First comment with a suggestion + ```suggestion:-0+2 + first improved line + second improved line + third improved line + fourth improved line + ``` + Some more comments + NOTE_CONTENT + end + end + + context 'when the response contains multiple comments' do + let(:body) do + <<~RESPONSE + <review> + <comment priority="3" old_line="1" new_line="2"> + First comment with a suggestion + <from> + first offending line + </from> + <to> + first improved line + </to> + </comment> + + <comment priority="2" old_line="" new_line="5"> + Second comment with a suggestion + <from> + first offending line + second offending line + </from> + <to> + second improved line + third improved line + </to> + + Some more comments + </comment> + </review> + RESPONSE + end + + it 'parses both comments correctly' do + expect(parser.comments.count).to eq 2 + + first_comment = parser.comments[0] + expect(first_comment.priority).to eq 3 + expect(first_comment.old_line).to eq 1 + expect(first_comment.new_line).to eq 2 + expect(first_comment.content).to eq <<~NOTE_CONTENT + First comment with a suggestion + ```suggestion:-0+0 + first improved line + ``` + NOTE_CONTENT + + second_comment = parser.comments[1] + expect(second_comment.priority).to be 2 + expect(second_comment.old_line).to be_nil + expect(second_comment.new_line).to be 5 + expect(second_comment.content).to eq <<~NOTE_CONTENT + Second comment with a suggestion + ```suggestion:-0+1 + second improved line + third improved line + ``` + + Some more comments + NOTE_CONTENT + end + end + + context 'when the response contains multiple suggestions in one comment' do + let(:body) do + <<~RESPONSE + <review> + <comment priority="3" old_line="1" new_line="2"> + First comment with a suggestion + <from> + first offending line + </from> + <to> + first improved line + </to> + + Alternative suggestion + <from> + first offending line + second offending line + </from> + <to> + second improved line + third improved line + </to> + + Some more comments + </comment> + </review> + RESPONSE + end + + it 'parses both suggestions correctly' do + comment = parser.comments.sole + + expect(comment.content).to eq <<~NOTE_CONTENT + First comment with a suggestion + ```suggestion:-0+0 + first improved line + ``` + + Alternative suggestion + ```suggestion:-0+1 + second improved line + third improved line + ``` + + Some more comments + NOTE_CONTENT + end + end + + context 'when the content includes other elements' do + let(:body) do + <<~RESPONSE + <review> + <comment priority="3" old_line="" new_line="2"> + <from> + <div>first offending line</div> + <p>second offending line</p> + </from> + <to> + <div>first improved line</div> + <p>second improved line</p> + </to> + </comment> + </review> + RESPONSE + end + + it 'returns the expected comment' do + comment = parser.comments.sole + + expect(comment.content).to eq <<~NOTE_CONTENT + ```suggestion:-0+1 + <div>first improved line</div> + <p>second improved line</p> + ``` + NOTE_CONTENT + end + end + + context 'when the content includes <from> and <to>' do + let(:body) do + <<~RESPONSE + <review> + <comment priority="3" old_line="" new_line="2"> + <from> + <from>first offending line</from> + <to>second offending line</to> + </from> + <to> + <from>first improved line</from> + <to>second improved line</to> + </to> + </comment> + </review> + RESPONSE + end + + it 'returns the expected comment' do + comment = parser.comments.sole + + expect(comment.content).to eq <<~NOTE_CONTENT + ```suggestion:-0+1 + <from>first improved line</from> + <to>second improved line</to> + ``` + NOTE_CONTENT + end + end + + context 'when the content includes <from>' do + let(:body) do + <<~RESPONSE + <review> + <comment priority="3" old_line="1" new_line="2"> + Some comment including a <from> tag + <from> + <from> + Old + </from> + </from> + <to> + <from> + New + </from> + </to> + </comment> + </review> + RESPONSE + end + + it 'returns the expected comment' do + comment = parser.comments.sole + + expect(comment.content).to eq <<~NOTE_CONTENT + Some comment including a <from> tag + ```suggestion:-0+2 + <from> + New + </from> + ``` + NOTE_CONTENT + end + end + + context 'when the suggestion contains any reserved XML characters' do + let(:body) do + <<~RESPONSE + <review> + <comment priority="3" old_line="1" new_line="2"> + First comment with suggestions + <from> + a && b + </from> + <to> + a && b < c + </to> + </comment> + </review> + RESPONSE + end + + it 'returns the expected comment' do + comment = parser.comments.sole + + expect(comment.content).to eq <<~NOTE_CONTENT + First comment with suggestions + ```suggestion:-0+0 + a && b < c + ``` + NOTE_CONTENT + end + end + + context 'when the code suggestion contains line breaks only' do + let(:body) do + <<~RESPONSE + <review> + <comment priority="3" old_line="1" new_line="2"> + Please remove extra lines + <from> + + + + </from> + <to> + + </to> + </comment> + </review> + RESPONSE + end + + it 'returns the expected comment' do + comment = parser.comments.sole + + expect(comment.content).to eq <<~NOTE_CONTENT + Please remove extra lines + ```suggestion:-0+2 + + ``` + NOTE_CONTENT + end + end + + context 'when the comment include only <to> tag' do + let(:body) do + <<~RESPONSE + <review> + <comment priority="3" old_line="" new_line="2"> + First comment with suggestions + <to> + something random + </to> + Some more comments + </comment> + </review> + RESPONSE + end + + it 'returns the expected comment' do + comment = parser.comments.sole + + expect(comment.content).to eq <<~NOTE_CONTENT + First comment with suggestions + <to> + something random + </to> + Some more comments + NOTE_CONTENT + end + end + + context 'when the response includes contents outside of <review> tag' do + let(:body) do + <<~RESPONSE + Let me explain how awesome this review is. + + <review> + <comment priority="3" old_line="" new_line="2"> + Example comment + </comment> + </review> + RESPONSE + end + + it 'returns the expected comment' do + comment = parser.comments.sole + + expect(comment.content).to eq <<~NOTE_CONTENT + Example comment + NOTE_CONTENT + end + end + end + end + + context 'when the review content is empty' do + let(:body) { "<review></review>" } + + it 'returns an empty array' do + expect(parser.comments).to be_blank + end + end + + context 'when the body is nil' do + let(:body) { nil } + + it 'returns an empty array' do + expect(parser.comments).to be_blank + end + end + + context 'when the body is empty string' do + let(:body) { '' } + + it 'returns an empty array' do + expect(parser.comments).to be_blank + end + end + + context 'when <comment> tag is missing' do + let(:body) { "<review>some random text</review>" } + + it 'returns an empty array' do + expect(parser.comments).to be_blank + end + end + + context 'when attributes are missing' do + let(:body) do + <<~RESPONSE + <review> + <comment> + Example comment + </comment> + </review> + RESPONSE + end + + it 'returns expected output' do + expect(parser.comments).to be_blank + end + end + + context 'when priority attribute is missing' do + let(:body) do + <<~RESPONSE + <review> + <comment old_line="1" new_line="2"> + Example comment + </comment> + </review> + RESPONSE + end + + it 'returns expected output' do + expect(parser.comments).to be_empty + end + end + + context 'when line attributes are missing' do + let(:body) do + <<~RESPONSE + <review> + <comment old_line="1"> + Example comment + </comment> + </review> + RESPONSE + end + + it 'returns expected output' do + expect(parser.comments).to be_empty + end + end + end +end diff --git a/ee/spec/lib/gitlab/llm/anthropic/completions/review_merge_request_spec.rb b/ee/spec/lib/gitlab/llm/anthropic/completions/review_merge_request_spec.rb index a7729c9fbceb2bdb2a0d054884daf25f77b5c44e..89c3230e970835632559d073bd6200f1fe645692 100644 --- a/ee/spec/lib/gitlab/llm/anthropic/completions/review_merge_request_spec.rb +++ b/ee/spec/lib/gitlab/llm/anthropic/completions/review_merge_request_spec.rb @@ -252,152 +252,6 @@ end end - context 'when the <from> and <to> content is at the same line as the tags' do - let(:first_review_answer) do - <<~RESPONSE - <review> - <comment priority="3" old_line="" new_line="2"> - First comment with suggestions - <from> first offending line</from> - <to> first improved line</to> - Some more comments - </comment> - </review> - RESPONSE - end - - let(:second_review_response) { {} } - - it 'returns the correctly formatted suggestion block' do - completion.execute - - diff_note = merge_request.notes.diff_notes.authored_by(duo_code_review_bot).last - expect(diff_note.note).to eq <<~NOTE_CONTENT - First comment with suggestions - ```suggestion:-0+0 - first improved line - ``` - Some more comments - NOTE_CONTENT - end - end - - context 'when the content includes other elements' do - let(:first_review_answer) do - <<~RESPONSE - <review> - <comment priority="3" old_line="" new_line="2"> - First comment with suggestions - <from> - <div>first offending line</div> - <p>second offending line</p> - </from> - <to> - <div>first improved line</div> - <p>second improved line</p> - </to> - Some more comments - </comment> - </review> - RESPONSE - end - - let(:second_review_response) { {} } - - it 'returns the correctly formatted suggestion block' do - completion.execute - - diff_note = merge_request.notes.diff_notes.authored_by(duo_code_review_bot).last - expect(diff_note.note).to eq <<~NOTE_CONTENT - First comment with suggestions - ```suggestion:-0+1 - <div>first improved line</div> - <p>second improved line</p> - ``` - Some more comments - NOTE_CONTENT - end - - context 'when the content contains additional <from> and <to>' do - # This format of response is intentional. - # It is to represent actual code changes in an MR wherein the diff - # actually includes `<from>` and `<to>` elements. - let(:first_review_answer) do - <<~RESPONSE - <review> - <comment priority="3" old_line="" new_line="2"> - First comment with suggestions - <from> - something fishy - </from> - <to> - something fishy - </to> - <from> - </from> - <to> - something better - </to> - </comment> - </review> - RESPONSE - end - - it 'returns the content as is' do - completion.execute - - diff_note = merge_request.notes.diff_notes.authored_by(duo_code_review_bot).last - expect(diff_note.note).to eq <<~NOTE_CONTENT - First comment with suggestions - <from> - something fishy - </from> - <to> - something fishy - </to> - <from> - </from> - <to> - something better - </to> - NOTE_CONTENT - end - end - end - - context 'when LLM returns incorrectly formatted XML' do - let(:first_review_answer) do - <<~RESPONSE - <review> - <comment priority="3" old_line="" new_line="2"> - First comment with suggestions - <to> - first improved line - second improved line - </to> - Some more comments - </comment> - </review> - RESPONSE - end - - let(:second_review_response) { {} } - - it 'returns the content as is' do - completion.execute - - diff_note = merge_request.notes.diff_notes.authored_by(duo_code_review_bot).last - expect(diff_note.note).to eq <<~NOTE_CONTENT - First comment with suggestions - <to> - first improved line - second improved line - </to> - Some more comments - NOTE_CONTENT - end - end - context 'when the chat client response includes invalid comments' do let(:first_review_response) { { content: [{ text: first_review_answer }] } } let(:first_review_answer) do @@ -413,8 +267,9 @@ let(:second_review_answer) do <<~RESPONSE <review> - <comment priority="" old_line="" new_line="1">Third comment with suggestions</comment> - <comment priority="3" old_line="" new_line="">Fourth comment with suggestions</comment> + <comment priority="" old_line="" new_line="1">Third comment with no priority</comment> + <comment priority="3" old_line="" new_line="">Fourth comment with missing lines</comment> + <comment priority="3" old_line="" new_line="10">Fifth comment with invalid line</comment> </review> RESPONSE end @@ -422,12 +277,10 @@ it 'creates a valid comment only' do completion.execute - diff_notes = merge_request.notes.diff_notes.authored_by(duo_code_review_bot).reorder(:id) - expect(diff_notes.count).to eq 1 + diff_note = merge_request.notes.diff_notes.authored_by(duo_code_review_bot).sole - first_note = diff_notes.first - expect(first_note.note).to eq 'Second comment with suggestions' - expect(first_note.position.new_line).to eq(2) + expect(diff_note.note).to eq 'Second comment with suggestions' + expect(diff_note.position.new_line).to eq(2) end end @@ -452,15 +305,18 @@ RESPONSE end - it 'creates a valid comment only' do + it 'creates valid <review> section only' do completion.execute diff_notes = merge_request.notes.diff_notes.authored_by(duo_code_review_bot).reorder(:id) - expect(diff_notes.count).to eq 1 + expect(diff_notes.count).to eq 2 - first_note = diff_notes.first + first_note = diff_notes[0] expect(first_note.note).to eq 'Second comment with suggestions' expect(first_note.position.new_line).to eq(1) + second_note = diff_notes[1] + expect(second_note.note).to eq 'First comment with suggestions' + expect(second_note.position.new_line).to eq(2) end end