diff --git a/ee/lib/tasks/gitlab/llm/questions.csv b/ee/lib/tasks/gitlab/llm/questions.csv index 64d111ea3cfdb4ec064d712bd6c2559c290d2587..604c5c5cc5800bb8ea406f28a59174955485913b 100644 --- a/ee/lib/tasks/gitlab/llm/questions.csv +++ b/ee/lib/tasks/gitlab/llm/questions.csv @@ -1,24 +1,24 @@ -"Can you explain the code ""def hello_world\\nputs(\""Hello, world!\\n\"");\nend""?",ExplainCode -"Can you explain function ""def hello_world\\nputs(\""Hello, world!\\n\"");\nend""?",ExplainCode -"Write me tests for function ""def hello_world\\nputs(\""Hello, world!\\n\"");\nend""",ExplainCode -"What is the complexity of the code ""def hello_world\\nputs(\""Hello, world!\\n\"");\nend""?",ExplainCode -"How would the ""def hello_world\\nputs(\""Hello, world!\\n\"");\nend"" code look like in Python?",ExplainCode -"How would you refactor the ""def hello_world\\nputs(\""Hello, world!\\n\"");\nend"" code?",ExplainCode -"Can you fix the bug in my ""def hello_world\\nput(\""Hello, world!\\n\"");\nend"" code?",ExplainCode -"Create an example of how to use method ""def hello_world\\nput(\""Hello, world!\\n\"");\nend""",ExplainCode -"Write documentation for the ""def hello_world\\nput(\""Hello, world!\\n\"");\nend"" code",ExplainCode -Create a function to validate an e-mail address,ExplainCode -Create a tic tac toe game in Javascript,ExplainCode -Create a function in Python to call the spotify API to get my playlists,ExplainCode - -Please summarize the %<issue_identifier>s issue,"IssueIdentifier, SummarizeComments" -Summarize the %<issue_identifier>s issue with bullet points,"IssueIdentifier, SummarizeComments" -Can you list all the labels on %<issue_identifier>s issue?,"IssueIdentifier, JsonReader" -How old is the %<issue_identifier>s issue?,"IssueIdentifier, JsonReader" -For which milestone is the %<issue_identifier>s issue? And how long until then,"IssueIdentifier, JsonReader" -Summarize the comments into bullet points on %<issue_identifier>s issue,"IssueIdentifier, SummarizeComments" - -How do I change my password in GitLab?,Documentation -How do I fork a project?,Documentation -How do I clone a repository?,Documentation -How do I create a template?,Documentation +"Can you explain the code ""def hello_world\\nputs(\""Hello, world!\\n\"");\nend""?",ExplainCode +"Can you explain function ""def hello_world\\nputs(\""Hello, world!\\n\"");\nend""?",ExplainCode +"Write me tests for function ""def hello_world\\nputs(\""Hello, world!\\n\"");\nend""",ExplainCode +"What is the complexity of the code ""def hello_world\\nputs(\""Hello, world!\\n\"");\nend""?",ExplainCode +"How would the ""def hello_world\\nputs(\""Hello, world!\\n\"");\nend"" code look like in Python?",ExplainCode +"How would you refactor the ""def hello_world\\nputs(\""Hello, world!\\n\"");\nend"" code?",ExplainCode +"Can you fix the bug in my ""def hello_world\\nput(\""Hello, world!\\n\"");\nend"" code?",ExplainCode +"Create an example of how to use method ""def hello_world\\nput(\""Hello, world!\\n\"");\nend""",ExplainCode +"Write documentation for the ""def hello_world\\nput(\""Hello, world!\\n\"");\nend"" code",ExplainCode +Create a function to validate an e-mail address,ExplainCode +Create a tic tac toe game in Javascript,ExplainCode +Create a function in Python to call the spotify API to get my playlists,ExplainCode + +Please summarize the %<issue_identifier>s issue,"IssueIdentifier, SummarizeComments" +Summarize the %<issue_identifier>s issue with bullet points,"IssueIdentifier, SummarizeComments" +Can you list all the labels on %<issue_identifier>s issue?,"IssueIdentifier, Resource Reader" +How old is the %<issue_identifier>s issue?,"IssueIdentifier, Resource Reader" +For which milestone is the %<issue_identifier>s issue? And how long until then,"IssueIdentifier, Resource Reader" +Summarize the comments into bullet points on %<issue_identifier>s issue,"IssueIdentifier, SummarizeComments" + +How do I change my password in GitLab?,GitlabDocumentation +How do I fork a project?,GitlabDocumentation +How do I clone a repository?,GitlabDocumentation +How do I create a template?,GitlabDocumentation diff --git a/ee/lib/tasks/gitlab/llm/zero_shot_tool_picker_test.rake b/ee/lib/tasks/gitlab/llm/zero_shot_tool_picker_test.rake index 0fc1d0b01813d4681fa631bbb1ddfd5b109af550..abc38a32780db4a66588112bc5055cb76a3549bf 100644 --- a/ee/lib/tasks/gitlab/llm/zero_shot_tool_picker_test.rake +++ b/ee/lib/tasks/gitlab/llm/zero_shot_tool_picker_test.rake @@ -15,10 +15,12 @@ namespace :gitlab do args.with_defaults(issue: 'http://127.0.0.1:3001/jashkenas/Underscore/-/issues/41') zero_shot_prompt_action = "the action to take, should be one from this list" - + counter = 0.0 + correct_answers_counter = 0 ::CSV.read(FILENAME).each do |row| next if row[0].blank? + counter += 1 question = format(row[0], { issue_identifier: args.issue }) logger.info("question: #{question}") logger.info("expected tool(s): #{row[1]}") @@ -29,8 +31,11 @@ namespace :gitlab do actions = agent.prompt.scan(/Action: (?<action>.+?)(?=$)/) actions.reject! { |action| action.first.start_with?(zero_shot_prompt_action) } + correct_answers_counter += accuracy_check(actions, row[1]) + logger.info("tools used: #{actions}") logger.info("actual response: #{response.content}") + logger.info("current accuracy rate #{(correct_answers_counter / counter) * 100}%") logger.info("\n\n") end end @@ -65,6 +70,20 @@ namespace :gitlab do context: context ) end + + def accuracy_check(actions, answer) + actions = actions.flatten + answer = answer.split(', ') + final_rating = 0 + + if actions == answer + final_rating += 1 + elsif actions.uniq == answer + final_rating += answer.size / actions.size + end + + final_rating + end end end end