From 33d9e49c17a6e2b802d0c1182a9dba75ede81725 Mon Sep 17 00:00:00 2001
From: Leaminn Ma <lma@gitlab.com>
Date: Tue, 1 Oct 2024 16:51:19 +0000
Subject: [PATCH] Add support for multiples files for Python Pip config file
 class

Adds support to find and parse multiple files for the
PythonPip config class. This is because Pip may have
more than one 'requirements' file.

This means that the ConfigFileParser may produce an
array of objects where more than one object may have
the same lang value.

In the next MR, we will update the Repository X-Ray
ScanDependencies service so that it combines the
payloads of config file objects belonging to the
same lang.
---
 .../dependencies/config_file_parser.rb        |  10 +-
 .../context/dependencies/config_files/base.rb |   9 ++
 .../dependencies/config_files/constants.rb    |   2 +-
 .../dependencies/config_files/python_pip.rb   |  14 ++-
 .../dependencies/config_file_parser_spec.rb   | 115 +++++++++++++++++-
 .../dependencies/config_files/base_spec.rb    |  31 +++++
 .../config_files/python_pip_spec.rb           |  24 ++--
 7 files changed, 182 insertions(+), 23 deletions(-)

diff --git a/ee/lib/ai/context/dependencies/config_file_parser.rb b/ee/lib/ai/context/dependencies/config_file_parser.rb
index bbac88db71ca0..a27c7fe5b91e5 100644
--- a/ee/lib/ai/context/dependencies/config_file_parser.rb
+++ b/ee/lib/ai/context/dependencies/config_file_parser.rb
@@ -55,12 +55,14 @@ def latest_commit_sha
         def config_file_classes_by_path
           CONFIG_FILE_CLASSES.group_by(&:lang).each_with_object({}) do |(_lang, klasses), hash|
             klasses.each do |klass|
-              matching_path = worktree_paths.find { |path| klass.matches?(path) }
+              matching_paths = klass.matching_paths(worktree_paths)
 
-              next unless matching_path
+              matching_paths.each do |path|
+                hash[path] ||= []
+                hash[path] << klass
+              end
 
-              hash[matching_path] ||= []
-              hash[matching_path] << klass
+              break if matching_paths.any?
             end
           end
         end
diff --git a/ee/lib/ai/context/dependencies/config_files/base.rb b/ee/lib/ai/context/dependencies/config_files/base.rb
index 73f09c3fc226e..a7eb11ac654a7 100644
--- a/ee/lib/ai/context/dependencies/config_files/base.rb
+++ b/ee/lib/ai/context/dependencies/config_files/base.rb
@@ -36,6 +36,15 @@ def self.matches?(path)
             File.fnmatch?("**/#{file_name_glob}", path, File::FNM_PATHNAME | File::FNM_DOTMATCH)
           end
 
+          # Set to `true` if the dependency manager supports more than one config file
+          def self.supports_multiple_files?
+            false
+          end
+
+          def self.matching_paths(paths)
+            supports_multiple_files? ? paths.select { |p| matches?(p) } : Array.wrap(paths.find { |p| matches?(p) })
+          end
+
           def initialize(blob)
             @blob = blob
             @content = blob.data
diff --git a/ee/lib/ai/context/dependencies/config_files/constants.rb b/ee/lib/ai/context/dependencies/config_files/constants.rb
index 10a7602613a59..506c97cb6e65a 100644
--- a/ee/lib/ai/context/dependencies/config_files/constants.rb
+++ b/ee/lib/ai/context/dependencies/config_files/constants.rb
@@ -13,7 +13,7 @@ module Constants
           #    #supported-languages-and-package-managers.
           #
           # This ordering affects the result of
-          # ConfigFileParser#find_config_file_paths_with_class.
+          # ConfigFileParser#config_file_classes_by_path.
           #
           CONFIG_FILE_CLASSES = [
             ConfigFiles::CConanPy,
diff --git a/ee/lib/ai/context/dependencies/config_files/python_pip.rb b/ee/lib/ai/context/dependencies/config_files/python_pip.rb
index 0ffec846df13d..5de2d36ae411b 100644
--- a/ee/lib/ai/context/dependencies/config_files/python_pip.rb
+++ b/ee/lib/ai/context/dependencies/config_files/python_pip.rb
@@ -11,14 +11,20 @@ class PythonPip < Base
           COMMENT_ONLY_REGEX = /^#/
           INLINE_COMMENT_REGEX = /\s+#.*$/
 
+          # We support nested requirements files by processing all files matching
+          # this glob. See https://gitlab.com/gitlab-org/gitlab/-/issues/491800.
           def self.file_name_glob
-            'requirements.txt'
+            '*requirements*.txt'
           end
 
           def self.lang_name
             'Python'
           end
 
+          def self.supports_multiple_files?
+            true
+          end
+
           private
 
           ### Example format:
@@ -32,10 +38,8 @@ def self.lang_name
           # openpyxl == 3.1.2
           # urllib3 @ https://github.com/path/main.zip
           #
-          # # Nested requirement files currently not supported
-          # # TODO: https://gitlab.com/gitlab-org/gitlab/-/issues/491800
-          # -r other_requirements.txt
-          # # Other options
+          # # Options
+          # -r other_requirements.txt # A nested requirements file
           # -i https://pypi.org/simple
           # --python-version 3
           #
diff --git a/ee/spec/lib/ai/context/dependencies/config_file_parser_spec.rb b/ee/spec/lib/ai/context/dependencies/config_file_parser_spec.rb
index 5981c1fe8b339..741fa5bba67c7 100644
--- a/ee/spec/lib/ai/context/dependencies/config_file_parser_spec.rb
+++ b/ee/spec/lib/ai/context/dependencies/config_file_parser_spec.rb
@@ -57,7 +57,9 @@
             lang: 'go',
             valid: true,
             error_message: nil,
-            payload: a_hash_including(libs: [{ name: 'abc.org/mylib (1.3.0)' }, { name: 'golang.org/x/mod (0.5.0)' }])
+            payload: a_hash_including(
+              fileName: 'dir1/dir2/go.mod',
+              libs: [{ name: 'abc.org/mylib (1.3.0)' }, { name: 'golang.org/x/mod (0.5.0)' }])
           }
         )
       end
@@ -81,17 +83,124 @@
               lang: 'c',
               valid: true,
               error_message: nil,
-              payload: a_hash_including(libs: [{ name: 'libiconv (1.17)' }, { name: 'poco (>1.0,<1.9)' }])
+              payload: a_hash_including(
+                fileName: 'dir1/dir2/conanfile.txt',
+                libs: [{ name: 'libiconv (1.17)' }, { name: 'poco (>1.0,<1.9)' }])
             },
             {
               lang: 'cpp',
               valid: true,
               error_message: nil,
-              payload: a_hash_including(libs: [{ name: 'libiconv (1.17)' }, { name: 'poco (>1.0,<1.9)' }])
+              payload: a_hash_including(
+                fileName: 'dir1/dir2/conanfile.txt',
+                libs: [{ name: 'libiconv (1.17)' }, { name: 'poco (>1.0,<1.9)' }])
             }
           )
         end
       end
+
+      context 'with files matching multiple config file classes for the same language' do
+        let_it_be(:project) do
+          create(:project, :custom_repo, files:
+            {
+              'pom.xml' =>
+                <<~CONTENT,
+                  <project>
+                      <dependencies>
+                          <dependency>
+                              <groupId>org.junit.jupiter</groupId>
+                              <artifactId>junit-jupiter-engine</artifactId>
+                              <version>1.2.0</version>
+                          </dependency>
+                      </dependencies>
+                  </project>
+                CONTENT
+              'dir1/dir2/build.gradle' =>
+                <<~CONTENT
+                  dependencies {
+                      implementation 'org.codehaus.groovy:groovy:3.+'
+                      "implementation" 'org.ow2.asm:asm:9.6'
+                  }
+                CONTENT
+            })
+        end
+
+        it 'returns an object of only the first matching config file class in the order of `CONFIG_FILE_CLASSES`' do
+          expect(config_files_array).to contain_exactly(
+            {
+              lang: 'java',
+              valid: true,
+              error_message: nil,
+              payload: a_hash_including(
+                fileName: 'dir1/dir2/build.gradle',
+                libs: [{ name: 'groovy (3.+)' }, { name: 'asm (9.6)' }])
+            }
+          )
+        end
+      end
+
+      context 'with multiple files matching the same config file class' do
+        context 'when the config file class does not support multiple files' do
+          let_it_be(:project) do
+            create(:project, :custom_repo, files:
+              {
+                'go.mod' =>
+                  <<~CONTENT,
+                    require abc.org/mylib v1.3.0
+                  CONTENT
+                'dir1/dir2/go.mod' =>
+                  <<~CONTENT
+                    require golang.org/x/mod v0.5.0
+                  CONTENT
+              })
+          end
+
+          it 'returns a config file object for only one of the matching files' do
+            # We can't be sure which file is found first because it depends on the order of the worktree paths
+            expect(config_files_array.size).to eq(1)
+          end
+        end
+
+        context 'when the config file class supports multiple files' do
+          let_it_be(:project) do
+            create(:project, :custom_repo, files:
+              {
+                'requirements.txt' =>
+                  <<~CONTENT,
+                    requests>=2.0,<3.0
+                    numpy==1.26.4
+                    -r dir1/dir2/dev-requirements.txt
+                  CONTENT
+                'dir1/dir2/dev-requirements.txt' =>
+                  <<~CONTENT
+                    python_dateutil>=2.5.3
+                    fastapi-health!=0.3.0
+                  CONTENT
+              })
+          end
+
+          it 'returns a config file object for each matching file' do
+            expect(config_files_array).to contain_exactly(
+              {
+                lang: 'python',
+                valid: true,
+                error_message: nil,
+                payload: a_hash_including(
+                  fileName: 'requirements.txt',
+                  libs: [{ name: 'requests (>=2.0,<3.0)' }, { name: 'numpy (==1.26.4)' }])
+              },
+              {
+                lang: 'python',
+                valid: true,
+                error_message: nil,
+                payload: a_hash_including(
+                  fileName: 'dir1/dir2/dev-requirements.txt',
+                  libs: [{ name: 'python_dateutil (>=2.5.3)' }, { name: 'fastapi-health (!=0.3.0)' }])
+              }
+            )
+          end
+        end
+      end
     end
   end
 
diff --git a/ee/spec/lib/ai/context/dependencies/config_files/base_spec.rb b/ee/spec/lib/ai/context/dependencies/config_files/base_spec.rb
index 82ce0580cd2a3..e648292dbac9f 100644
--- a/ee/spec/lib/ai/context/dependencies/config_files/base_spec.rb
+++ b/ee/spec/lib/ai/context/dependencies/config_files/base_spec.rb
@@ -34,6 +34,7 @@ def extract_libs
     expect { described_class.file_name_glob }.to raise_error(NotImplementedError)
     expect { described_class.lang_name }.to raise_error(NotImplementedError)
     expect { described_class.new(blob).parse! }.to raise_error(NotImplementedError)
+    expect(described_class.supports_multiple_files?).to eq(false)
   end
 
   it 'returns the expected language value' do
@@ -133,4 +134,34 @@ def extract_libs
       end
     end
   end
+
+  describe '.matching_paths' do
+    let(:paths) { ['other.rb', 'dir/test.json', 'test.txt', 'test.json', 'README.md'] }
+
+    subject(:matching_paths) { config_file_class.matching_paths(paths) }
+
+    it 'returns the first matching path' do
+      expect(matching_paths).to contain_exactly('dir/test.json')
+    end
+
+    context 'when multiple files are supported' do
+      before do
+        stub_const('ConfigFileClass',
+          Class.new(described_class) do
+            def self.file_name_glob
+              'test.json'
+            end
+
+            def self.supports_multiple_files?
+              true
+            end
+          end
+        )
+      end
+
+      it 'returns all matching paths' do
+        expect(matching_paths).to contain_exactly('dir/test.json', 'test.json')
+      end
+    end
+  end
 end
diff --git a/ee/spec/lib/ai/context/dependencies/config_files/python_pip_spec.rb b/ee/spec/lib/ai/context/dependencies/config_files/python_pip_spec.rb
index f0209ab189d4c..5f1165630ecfe 100644
--- a/ee/spec/lib/ai/context/dependencies/config_files/python_pip_spec.rb
+++ b/ee/spec/lib/ai/context/dependencies/config_files/python_pip_spec.rb
@@ -7,6 +7,10 @@
     expect(described_class.lang).to eq('python')
   end
 
+  it 'supports multiple files' do
+    expect(described_class.supports_multiple_files?).to eq(true)
+  end
+
   it_behaves_like 'parsing a valid dependency config file' do
     let(:config_file_content) do
       <<~CONTENT
@@ -21,9 +25,8 @@
         urllib3 @ https://github.com/path/main.zip
         requests [security] >= 2.8.1, == 2.8.*
 
-        # Nested requirement files currently not supported
+        # Options
         -r other_requirements.txt
-        # Other options
         -i https://pypi.org/simple
         --python-version 3
         --no-clean
@@ -54,14 +57,15 @@
     using RSpec::Parameterized::TableSyntax
 
     where(:path, :matches) do
-      'requirements.txt'            | true
-      'dir/requirements.txt'        | true
-      'dir/subdir/requirements.txt' | true
-      'dir/requirements'            | false
-      'xrequirements.txt'           | false
-      'Requirements.txt'            | false
-      'requirements_txt'            | false
-      'requirements'                | false
+      'requirements.txt'                | true
+      'dir/requirements_other.txt'      | true
+      'dir/subdir/dev-requirements.txt' | true
+      'dir/requirements.c'              | false
+      'test_requirements.txt'           | true
+      'devrequirements.txt'             | true
+      'Requirements.txt'                | false
+      'requirements_txt'                | false
+      'requirements'                    | false
     end
 
     with_them do
-- 
GitLab