From 33d9e49c17a6e2b802d0c1182a9dba75ede81725 Mon Sep 17 00:00:00 2001 From: Leaminn Ma <lma@gitlab.com> Date: Tue, 1 Oct 2024 16:51:19 +0000 Subject: [PATCH] Add support for multiples files for Python Pip config file class Adds support to find and parse multiple files for the PythonPip config class. This is because Pip may have more than one 'requirements' file. This means that the ConfigFileParser may produce an array of objects where more than one object may have the same lang value. In the next MR, we will update the Repository X-Ray ScanDependencies service so that it combines the payloads of config file objects belonging to the same lang. --- .../dependencies/config_file_parser.rb | 10 +- .../context/dependencies/config_files/base.rb | 9 ++ .../dependencies/config_files/constants.rb | 2 +- .../dependencies/config_files/python_pip.rb | 14 ++- .../dependencies/config_file_parser_spec.rb | 115 +++++++++++++++++- .../dependencies/config_files/base_spec.rb | 31 +++++ .../config_files/python_pip_spec.rb | 24 ++-- 7 files changed, 182 insertions(+), 23 deletions(-) diff --git a/ee/lib/ai/context/dependencies/config_file_parser.rb b/ee/lib/ai/context/dependencies/config_file_parser.rb index bbac88db71ca0..a27c7fe5b91e5 100644 --- a/ee/lib/ai/context/dependencies/config_file_parser.rb +++ b/ee/lib/ai/context/dependencies/config_file_parser.rb @@ -55,12 +55,14 @@ def latest_commit_sha def config_file_classes_by_path CONFIG_FILE_CLASSES.group_by(&:lang).each_with_object({}) do |(_lang, klasses), hash| klasses.each do |klass| - matching_path = worktree_paths.find { |path| klass.matches?(path) } + matching_paths = klass.matching_paths(worktree_paths) - next unless matching_path + matching_paths.each do |path| + hash[path] ||= [] + hash[path] << klass + end - hash[matching_path] ||= [] - hash[matching_path] << klass + break if matching_paths.any? end end end diff --git a/ee/lib/ai/context/dependencies/config_files/base.rb b/ee/lib/ai/context/dependencies/config_files/base.rb index 73f09c3fc226e..a7eb11ac654a7 100644 --- a/ee/lib/ai/context/dependencies/config_files/base.rb +++ b/ee/lib/ai/context/dependencies/config_files/base.rb @@ -36,6 +36,15 @@ def self.matches?(path) File.fnmatch?("**/#{file_name_glob}", path, File::FNM_PATHNAME | File::FNM_DOTMATCH) end + # Set to `true` if the dependency manager supports more than one config file + def self.supports_multiple_files? + false + end + + def self.matching_paths(paths) + supports_multiple_files? ? paths.select { |p| matches?(p) } : Array.wrap(paths.find { |p| matches?(p) }) + end + def initialize(blob) @blob = blob @content = blob.data diff --git a/ee/lib/ai/context/dependencies/config_files/constants.rb b/ee/lib/ai/context/dependencies/config_files/constants.rb index 10a7602613a59..506c97cb6e65a 100644 --- a/ee/lib/ai/context/dependencies/config_files/constants.rb +++ b/ee/lib/ai/context/dependencies/config_files/constants.rb @@ -13,7 +13,7 @@ module Constants # #supported-languages-and-package-managers. # # This ordering affects the result of - # ConfigFileParser#find_config_file_paths_with_class. + # ConfigFileParser#config_file_classes_by_path. # CONFIG_FILE_CLASSES = [ ConfigFiles::CConanPy, diff --git a/ee/lib/ai/context/dependencies/config_files/python_pip.rb b/ee/lib/ai/context/dependencies/config_files/python_pip.rb index 0ffec846df13d..5de2d36ae411b 100644 --- a/ee/lib/ai/context/dependencies/config_files/python_pip.rb +++ b/ee/lib/ai/context/dependencies/config_files/python_pip.rb @@ -11,14 +11,20 @@ class PythonPip < Base COMMENT_ONLY_REGEX = /^#/ INLINE_COMMENT_REGEX = /\s+#.*$/ + # We support nested requirements files by processing all files matching + # this glob. See https://gitlab.com/gitlab-org/gitlab/-/issues/491800. def self.file_name_glob - 'requirements.txt' + '*requirements*.txt' end def self.lang_name 'Python' end + def self.supports_multiple_files? + true + end + private ### Example format: @@ -32,10 +38,8 @@ def self.lang_name # openpyxl == 3.1.2 # urllib3 @ https://github.com/path/main.zip # - # # Nested requirement files currently not supported - # # TODO: https://gitlab.com/gitlab-org/gitlab/-/issues/491800 - # -r other_requirements.txt - # # Other options + # # Options + # -r other_requirements.txt # A nested requirements file # -i https://pypi.org/simple # --python-version 3 # diff --git a/ee/spec/lib/ai/context/dependencies/config_file_parser_spec.rb b/ee/spec/lib/ai/context/dependencies/config_file_parser_spec.rb index 5981c1fe8b339..741fa5bba67c7 100644 --- a/ee/spec/lib/ai/context/dependencies/config_file_parser_spec.rb +++ b/ee/spec/lib/ai/context/dependencies/config_file_parser_spec.rb @@ -57,7 +57,9 @@ lang: 'go', valid: true, error_message: nil, - payload: a_hash_including(libs: [{ name: 'abc.org/mylib (1.3.0)' }, { name: 'golang.org/x/mod (0.5.0)' }]) + payload: a_hash_including( + fileName: 'dir1/dir2/go.mod', + libs: [{ name: 'abc.org/mylib (1.3.0)' }, { name: 'golang.org/x/mod (0.5.0)' }]) } ) end @@ -81,17 +83,124 @@ lang: 'c', valid: true, error_message: nil, - payload: a_hash_including(libs: [{ name: 'libiconv (1.17)' }, { name: 'poco (>1.0,<1.9)' }]) + payload: a_hash_including( + fileName: 'dir1/dir2/conanfile.txt', + libs: [{ name: 'libiconv (1.17)' }, { name: 'poco (>1.0,<1.9)' }]) }, { lang: 'cpp', valid: true, error_message: nil, - payload: a_hash_including(libs: [{ name: 'libiconv (1.17)' }, { name: 'poco (>1.0,<1.9)' }]) + payload: a_hash_including( + fileName: 'dir1/dir2/conanfile.txt', + libs: [{ name: 'libiconv (1.17)' }, { name: 'poco (>1.0,<1.9)' }]) } ) end end + + context 'with files matching multiple config file classes for the same language' do + let_it_be(:project) do + create(:project, :custom_repo, files: + { + 'pom.xml' => + <<~CONTENT, + <project> + <dependencies> + <dependency> + <groupId>org.junit.jupiter</groupId> + <artifactId>junit-jupiter-engine</artifactId> + <version>1.2.0</version> + </dependency> + </dependencies> + </project> + CONTENT + 'dir1/dir2/build.gradle' => + <<~CONTENT + dependencies { + implementation 'org.codehaus.groovy:groovy:3.+' + "implementation" 'org.ow2.asm:asm:9.6' + } + CONTENT + }) + end + + it 'returns an object of only the first matching config file class in the order of `CONFIG_FILE_CLASSES`' do + expect(config_files_array).to contain_exactly( + { + lang: 'java', + valid: true, + error_message: nil, + payload: a_hash_including( + fileName: 'dir1/dir2/build.gradle', + libs: [{ name: 'groovy (3.+)' }, { name: 'asm (9.6)' }]) + } + ) + end + end + + context 'with multiple files matching the same config file class' do + context 'when the config file class does not support multiple files' do + let_it_be(:project) do + create(:project, :custom_repo, files: + { + 'go.mod' => + <<~CONTENT, + require abc.org/mylib v1.3.0 + CONTENT + 'dir1/dir2/go.mod' => + <<~CONTENT + require golang.org/x/mod v0.5.0 + CONTENT + }) + end + + it 'returns a config file object for only one of the matching files' do + # We can't be sure which file is found first because it depends on the order of the worktree paths + expect(config_files_array.size).to eq(1) + end + end + + context 'when the config file class supports multiple files' do + let_it_be(:project) do + create(:project, :custom_repo, files: + { + 'requirements.txt' => + <<~CONTENT, + requests>=2.0,<3.0 + numpy==1.26.4 + -r dir1/dir2/dev-requirements.txt + CONTENT + 'dir1/dir2/dev-requirements.txt' => + <<~CONTENT + python_dateutil>=2.5.3 + fastapi-health!=0.3.0 + CONTENT + }) + end + + it 'returns a config file object for each matching file' do + expect(config_files_array).to contain_exactly( + { + lang: 'python', + valid: true, + error_message: nil, + payload: a_hash_including( + fileName: 'requirements.txt', + libs: [{ name: 'requests (>=2.0,<3.0)' }, { name: 'numpy (==1.26.4)' }]) + }, + { + lang: 'python', + valid: true, + error_message: nil, + payload: a_hash_including( + fileName: 'dir1/dir2/dev-requirements.txt', + libs: [{ name: 'python_dateutil (>=2.5.3)' }, { name: 'fastapi-health (!=0.3.0)' }]) + } + ) + end + end + end end end diff --git a/ee/spec/lib/ai/context/dependencies/config_files/base_spec.rb b/ee/spec/lib/ai/context/dependencies/config_files/base_spec.rb index 82ce0580cd2a3..e648292dbac9f 100644 --- a/ee/spec/lib/ai/context/dependencies/config_files/base_spec.rb +++ b/ee/spec/lib/ai/context/dependencies/config_files/base_spec.rb @@ -34,6 +34,7 @@ def extract_libs expect { described_class.file_name_glob }.to raise_error(NotImplementedError) expect { described_class.lang_name }.to raise_error(NotImplementedError) expect { described_class.new(blob).parse! }.to raise_error(NotImplementedError) + expect(described_class.supports_multiple_files?).to eq(false) end it 'returns the expected language value' do @@ -133,4 +134,34 @@ def extract_libs end end end + + describe '.matching_paths' do + let(:paths) { ['other.rb', 'dir/test.json', 'test.txt', 'test.json', 'README.md'] } + + subject(:matching_paths) { config_file_class.matching_paths(paths) } + + it 'returns the first matching path' do + expect(matching_paths).to contain_exactly('dir/test.json') + end + + context 'when multiple files are supported' do + before do + stub_const('ConfigFileClass', + Class.new(described_class) do + def self.file_name_glob + 'test.json' + end + + def self.supports_multiple_files? + true + end + end + ) + end + + it 'returns all matching paths' do + expect(matching_paths).to contain_exactly('dir/test.json', 'test.json') + end + end + end end diff --git a/ee/spec/lib/ai/context/dependencies/config_files/python_pip_spec.rb b/ee/spec/lib/ai/context/dependencies/config_files/python_pip_spec.rb index f0209ab189d4c..5f1165630ecfe 100644 --- a/ee/spec/lib/ai/context/dependencies/config_files/python_pip_spec.rb +++ b/ee/spec/lib/ai/context/dependencies/config_files/python_pip_spec.rb @@ -7,6 +7,10 @@ expect(described_class.lang).to eq('python') end + it 'supports multiple files' do + expect(described_class.supports_multiple_files?).to eq(true) + end + it_behaves_like 'parsing a valid dependency config file' do let(:config_file_content) do <<~CONTENT @@ -21,9 +25,8 @@ urllib3 @ https://github.com/path/main.zip requests [security] >= 2.8.1, == 2.8.* - # Nested requirement files currently not supported + # Options -r other_requirements.txt - # Other options -i https://pypi.org/simple --python-version 3 --no-clean @@ -54,14 +57,15 @@ using RSpec::Parameterized::TableSyntax where(:path, :matches) do - 'requirements.txt' | true - 'dir/requirements.txt' | true - 'dir/subdir/requirements.txt' | true - 'dir/requirements' | false - 'xrequirements.txt' | false - 'Requirements.txt' | false - 'requirements_txt' | false - 'requirements' | false + 'requirements.txt' | true + 'dir/requirements_other.txt' | true + 'dir/subdir/dev-requirements.txt' | true + 'dir/requirements.c' | false + 'test_requirements.txt' | true + 'devrequirements.txt' | true + 'Requirements.txt' | false + 'requirements_txt' | false + 'requirements' | false end with_them do -- GitLab