From a47e4a01f9afd3d0f1137b9d3da9300191b9fd3a Mon Sep 17 00:00:00 2001
From: Erick Bajao <fbajao@gitlab.com>
Date: Wed, 18 Nov 2020 23:40:42 +0800
Subject: [PATCH] Implement smart cobertura class path correction

As we parse the cobertura XML, based on the given project full path and
pipeline worktree paths, we will make some assumptions on how to
determine the correct path of filenames that are not relative to the
project root.
---
 app/models/ci/build.rb                        |  14 +-
 app/models/ci/pipeline.rb                     |   2 +-
 .../eb-cobertura-background-fix.yml           |   5 +
 .../development/smart_cobertura_parser.yml    |   8 +
 lib/gitlab/ci/parsers/coverage/cobertura.rb   | 118 ++-
 spec/factories/ci/job_artifacts.rb            |  10 +
 ..._paths_not_relative_to_project_root.xml.gz | Bin 0 -> 530 bytes
 .../ci/parsers/coverage/cobertura_spec.rb     | 749 ++++++++++++++----
 spec/models/ci/build_spec.rb                  |  29 +-
 spec/models/ci/pipeline_spec.rb               |  10 +
 .../pipelines/create_artifact_service_spec.rb |   3 +-
 11 files changed, 794 insertions(+), 154 deletions(-)
 create mode 100644 changelogs/unreleased/eb-cobertura-background-fix.yml
 create mode 100644 config/feature_flags/development/smart_cobertura_parser.yml
 create mode 100644 spec/fixtures/cobertura/coverage_with_paths_not_relative_to_project_root.xml.gz

diff --git a/app/models/ci/build.rb b/app/models/ci/build.rb
index 4b1299c7aeec6..9babae4175112 100644
--- a/app/models/ci/build.rb
+++ b/app/models/ci/build.rb
@@ -916,8 +916,20 @@ def collect_accessibility_reports!(accessibility_report)
     end
 
     def collect_coverage_reports!(coverage_report)
+      project_path, worktree_paths = if Feature.enabled?(:smart_cobertura_parser, project)
+                                       # If the flag is disabled, we intentionally pass nil
+                                       # for both project_path and worktree_paths to fallback
+                                       # to the non-smart behavior of the parser
+                                       [project.full_path, pipeline.all_worktree_paths]
+                                     end
+
       each_report(Ci::JobArtifact::COVERAGE_REPORT_FILE_TYPES) do |file_type, blob|
-        Gitlab::Ci::Parsers.fabricate!(file_type).parse!(blob, coverage_report)
+        Gitlab::Ci::Parsers.fabricate!(file_type).parse!(
+          blob,
+          coverage_report,
+          project_path: project_path,
+          worktree_paths: worktree_paths
+        )
       end
 
       coverage_report
diff --git a/app/models/ci/pipeline.rb b/app/models/ci/pipeline.rb
index 99bc894b81290..a55f9d41df0ff 100644
--- a/app/models/ci/pipeline.rb
+++ b/app/models/ci/pipeline.rb
@@ -975,7 +975,7 @@ def accessibility_reports
 
     def coverage_reports
       Gitlab::Ci::Reports::CoverageReports.new.tap do |coverage_reports|
-        latest_report_builds(Ci::JobArtifact.coverage_reports).each do |build|
+        latest_report_builds(Ci::JobArtifact.coverage_reports).includes(:project).find_each do |build|
           build.collect_coverage_reports!(coverage_reports)
         end
       end
diff --git a/changelogs/unreleased/eb-cobertura-background-fix.yml b/changelogs/unreleased/eb-cobertura-background-fix.yml
new file mode 100644
index 0000000000000..5f679d6b54fb4
--- /dev/null
+++ b/changelogs/unreleased/eb-cobertura-background-fix.yml
@@ -0,0 +1,5 @@
+---
+title: Implement smart cobertura class path correction
+merge_request: 48048
+author:
+type: changed
diff --git a/config/feature_flags/development/smart_cobertura_parser.yml b/config/feature_flags/development/smart_cobertura_parser.yml
new file mode 100644
index 0000000000000..a3aa182e412b3
--- /dev/null
+++ b/config/feature_flags/development/smart_cobertura_parser.yml
@@ -0,0 +1,8 @@
+---
+name: smart_cobertura_parser
+introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/48048
+rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/284822
+milestone: '13.7'
+type: development
+group: group::testing
+default_enabled: false
diff --git a/lib/gitlab/ci/parsers/coverage/cobertura.rb b/lib/gitlab/ci/parsers/coverage/cobertura.rb
index 934c797580c35..1edcbac2f2547 100644
--- a/lib/gitlab/ci/parsers/coverage/cobertura.rb
+++ b/lib/gitlab/ci/parsers/coverage/cobertura.rb
@@ -5,50 +5,113 @@ module Ci
     module Parsers
       module Coverage
         class Cobertura
-          CoberturaParserError = Class.new(Gitlab::Ci::Parsers::ParserError)
+          InvalidXMLError = Class.new(Gitlab::Ci::Parsers::ParserError)
+          InvalidLineInformationError = Class.new(Gitlab::Ci::Parsers::ParserError)
 
-          def parse!(xml_data, coverage_report)
+          GO_SOURCE_PATTERN = '/usr/local/go/src'
+          MAX_SOURCES = 100
+
+          def parse!(xml_data, coverage_report, project_path: nil, worktree_paths: nil)
             root = Hash.from_xml(xml_data)
 
-            parse_all(root, coverage_report)
+            context = {
+              project_path: project_path,
+              paths: worktree_paths&.to_set,
+              sources: []
+            }
+
+            parse_all(root, coverage_report, context)
           rescue Nokogiri::XML::SyntaxError
-            raise CoberturaParserError, "XML parsing failed"
-          rescue
-            raise CoberturaParserError, "Cobertura parsing failed"
+            raise InvalidXMLError, "XML parsing failed"
           end
 
           private
 
-          def parse_all(root, coverage_report)
+          def parse_all(root, coverage_report, context)
             return unless root.present?
 
             root.each do |key, value|
-              parse_node(key, value, coverage_report)
+              parse_node(key, value, coverage_report, context)
             end
           end
 
-          def parse_node(key, value, coverage_report)
-            return if key == 'sources'
-
-            if key == 'class'
+          def parse_node(key, value, coverage_report, context)
+            if key == 'sources' && value['source'].present?
+              parse_sources(value['source'], context)
+            elsif key == 'package'
               Array.wrap(value).each do |item|
-                parse_class(item, coverage_report)
+                parse_package(item, coverage_report, context)
+              end
+            elsif key == 'class'
+              # This means the cobertura XML does not have classes within package nodes.
+              # This is possible in some cases like in simple JS project structures
+              # running Jest.
+              Array.wrap(value).each do |item|
+                parse_class(item, coverage_report, context)
               end
             elsif value.is_a?(Hash)
-              parse_all(value, coverage_report)
+              parse_all(value, coverage_report, context)
             elsif value.is_a?(Array)
               value.each do |item|
-                parse_all(item, coverage_report)
+                parse_all(item, coverage_report, context)
               end
             end
           end
 
-          def parse_class(file, coverage_report)
+          def parse_sources(sources, context)
+            return unless context[:project_path] && context[:paths]
+
+            sources = Array.wrap(sources)
+
+            # TODO: Go cobertura has a different format with how their packages
+            # are included in the filename. So we can't rely on the sources.
+            # We'll deal with this later.
+            return if sources.include?(GO_SOURCE_PATTERN)
+
+            sources.each do |source|
+              source = build_source_path(source, context)
+              context[:sources] << source if source.present?
+            end
+          end
+
+          def build_source_path(source, context)
+            # | raw source                  | extracted  |
+            # |-----------------------------|------------|
+            # | /builds/foo/test/SampleLib/ | SampleLib/ |
+            # | /builds/foo/test/something  | something  |
+            # | /builds/foo/test/           | nil        |
+            # | /builds/foo/test            | nil        |
+            source.split("#{context[:project_path]}/", 2)[1]
+          end
+
+          def parse_package(package, coverage_report, context)
+            classes = package.dig('classes', 'class')
+            return unless classes.present?
+
+            matched_filenames = Array.wrap(classes).map do |item|
+              parse_class(item, coverage_report, context)
+            end
+
+            # Remove these filenames from the paths to avoid conflict
+            # with other packages that may contain the same class filenames
+            remove_matched_filenames(matched_filenames, context)
+          end
+
+          def remove_matched_filenames(filenames, context)
+            return unless context[:paths]
+
+            filenames.each { |f| context[:paths].delete(f) }
+          end
+
+          def parse_class(file, coverage_report, context)
             return unless file["filename"].present? && file["lines"].present?
 
             parsed_lines = parse_lines(file["lines"])
+            filename = determine_filename(file["filename"], context)
+
+            coverage_report.add_file(filename, Hash[parsed_lines]) if filename
 
-            coverage_report.add_file(file["filename"], Hash[parsed_lines])
+            filename
           end
 
           def parse_lines(lines)
@@ -58,6 +121,27 @@ def parse_lines(lines)
               # Using `Integer()` here to raise exception on invalid values
               [Integer(line["number"]), Integer(line["hits"])]
             end
+          rescue
+            raise InvalidLineInformationError, "Line information had invalid values"
+          end
+
+          def determine_filename(filename, context)
+            return filename unless context[:sources].any?
+
+            full_filename = nil
+
+            context[:sources].each_with_index do |source, index|
+              break if index >= MAX_SOURCES
+              break if full_filename = check_source(source, filename, context)
+            end
+
+            full_filename
+          end
+
+          def check_source(source, filename, context)
+            full_path = File.join(source, filename)
+
+            return full_path if context[:paths].include?(full_path)
           end
         end
       end
diff --git a/spec/factories/ci/job_artifacts.rb b/spec/factories/ci/job_artifacts.rb
index 0f5ad013a6462..ad98e9d1f2420 100644
--- a/spec/factories/ci/job_artifacts.rb
+++ b/spec/factories/ci/job_artifacts.rb
@@ -229,6 +229,16 @@
       end
     end
 
+    trait :coverage_with_paths_not_relative_to_project_root do
+      file_type { :cobertura }
+      file_format { :gzip }
+
+      after(:build) do |artifact, evaluator|
+        artifact.file = fixture_file_upload(
+          Rails.root.join('spec/fixtures/cobertura/coverage_with_paths_not_relative_to_project_root.xml.gz'), 'application/x-gzip')
+      end
+    end
+
     trait :coverage_with_corrupted_data do
       file_type { :cobertura }
       file_format { :gzip }
diff --git a/spec/fixtures/cobertura/coverage_with_paths_not_relative_to_project_root.xml.gz b/spec/fixtures/cobertura/coverage_with_paths_not_relative_to_project_root.xml.gz
new file mode 100644
index 0000000000000000000000000000000000000000..c4adc63fccef72f67f431e80a6aa9fce2f2263f9
GIT binary patch
literal 530
zcmV+t0`2`DiwFpUHML&=17mM?WpZI>WnXt`bZB32VRUG7Uv6)7Uvgz^VRUJBWnXk}
zUvP47YGq?|Uvh76bS`*pYyh=XOK%e~5I#4*!u6qoLp-~Iw$M1~0eS<0l!DZwvpd;!
z@!FB?gz)d{eR%T_s<bHh<avGbeWS7Y{jO4Ei`Ge_XEbFAy`PWx<>zmo9-qEHkj#Xb
zD3C0k7LN~K2`#-}-vMN1iPmqdV9so8hAU$WWEwpLJcw1}<UPxMPUoW$?~#<$IJUy$
zj3(^n<WRD-LT6={Q1fXD+_<U7d`2hHvlU7P`(3O#OiB)%U0!fyrdli9N&lyV?uAcS
z%2G<atk8K;twa3!V|{KuBx!mb6kPCDsSDS5pTlw^RqntV<KbCsMUIsLXEUgT)QyKj
zmm3gZ55d<W`xVB~!gU7Gq6(@5O@=#Bb7c{f4xihp+Pc#P&y;YkZ5ypu+Oi)GEhQ_d
zuup<6X)m}=YD&(<GBB&qmnQFc_I5}0O8KeO(q9#RN{N$&7GW7f;LY2gM;w|KuQ%;m
zh9=Eo^=4}my{W>oH;YzE?*c@GoY9p~4k_#r;0S_4OZ)FCN>s|6(En185tPscEWDMv
znEp3Cx98ULTWoQdoaBsh(n-{l{Hva5#`tl=!P_u++=B%j$>Gqv=JuJx!8_;B{V&ZQ
UwEuW`|549JFL?;^N(lx40CF4#*Z=?k

literal 0
HcmV?d00001

diff --git a/spec/lib/gitlab/ci/parsers/coverage/cobertura_spec.rb b/spec/lib/gitlab/ci/parsers/coverage/cobertura_spec.rb
index 45e87466532b7..2313378d1e9f0 100644
--- a/spec/lib/gitlab/ci/parsers/coverage/cobertura_spec.rb
+++ b/spec/lib/gitlab/ci/parsers/coverage/cobertura_spec.rb
@@ -4,207 +4,690 @@
 
 RSpec.describe Gitlab::Ci::Parsers::Coverage::Cobertura do
   describe '#parse!' do
-    subject { described_class.new.parse!(cobertura, coverage_report) }
+    subject(:parse_report) { described_class.new.parse!(cobertura, coverage_report, project_path: project_path, worktree_paths: paths) }
 
     let(:coverage_report) { Gitlab::Ci::Reports::CoverageReports.new }
+    let(:project_path) { 'foo/bar' }
+    let(:paths) { ['app/user.rb'] }
+
+    let(:cobertura) do
+      <<~EOF
+      <coverage>
+        #{sources_xml}
+        #{classes_xml}
+      </coverage>
+      EOF
+    end
 
     context 'when data is Cobertura style XML' do
-      context 'when there is no <class>' do
-        let(:cobertura) { '' }
+      shared_examples_for 'ignoring sources, project_path, and worktree_paths' do
+        context 'when there is no <class>' do
+          let(:classes_xml) { '' }
 
-        it 'parses XML and returns empty coverage' do
-          expect { subject }.not_to raise_error
+          it 'parses XML and returns empty coverage' do
+            expect { parse_report }.not_to raise_error
 
-          expect(coverage_report.files).to eq({})
+            expect(coverage_report.files).to eq({})
+          end
         end
-      end
 
-      context 'when there is a <sources>' do
-        shared_examples_for 'ignoring sources' do
-          it 'parses XML without errors' do
-            expect { subject }.not_to raise_error
+        context 'when there is a single <class>' do
+          context 'with no lines' do
+            let(:classes_xml) do
+              <<~EOF
+              <packages><package name="app"><classes>
+                <class filename="app.rb"></class>
+              </classes></package></packages>
+              EOF
+            end
+
+            it 'parses XML and returns empty coverage' do
+              expect { parse_report }.not_to raise_error
+
+              expect(coverage_report.files).to eq({})
+            end
+          end
 
-            expect(coverage_report.files).to eq({})
+          context 'with a single line' do
+            let(:classes_xml) do
+              <<~EOF
+              <packages><package name="app"><classes>
+                <class filename="app.rb"><lines>
+                  <line number="1" hits="2"/>
+                </lines></class>
+              </classes></package></packages>
+              EOF
+            end
+
+            it 'parses XML and returns a single file with coverage' do
+              expect { parse_report }.not_to raise_error
+
+              expect(coverage_report.files).to eq({ 'app.rb' => { 1 => 2 } })
+            end
+          end
+
+          context 'without a package parent' do
+            let(:classes_xml) do
+              <<~EOF
+              <packages>
+                <class filename="app.rb"><lines>
+                  <line number="1" hits="2"/>
+                </lines></class>
+              </packages>
+              EOF
+            end
+
+            it 'parses XML and returns a single file with coverage' do
+              expect { parse_report }.not_to raise_error
+
+              expect(coverage_report.files).to eq({ 'app.rb' => { 1 => 2 } })
+            end
+          end
+
+          context 'with multiple lines and methods info' do
+            let(:classes_xml) do
+              <<~EOF
+              <packages><package name="app"><classes>
+                <class filename="app.rb"><methods/><lines>
+                  <line number="1" hits="2"/>
+                  <line number="2" hits="0"/>
+                </lines></class>
+              </classes></package></packages>
+              EOF
+            end
+
+            it 'parses XML and returns a single file with coverage' do
+              expect { parse_report }.not_to raise_error
+
+              expect(coverage_report.files).to eq({ 'app.rb' => { 1 => 2, 2 => 0 } })
+            end
           end
         end
 
-        context 'and has a single source' do
-          let(:cobertura) do
-            <<-EOF.strip_heredoc
+        context 'when there are multiple <class>' do
+          context 'without a package parent' do
+            let(:classes_xml) do
+              <<~EOF
+              <packages>
+                <class filename="app.rb"><methods/><lines>
+                  <line number="1" hits="2"/>
+                </lines></class>
+                <class filename="foo.rb"><methods/><lines>
+                  <line number="6" hits="1"/>
+                </lines></class>
+              </packages>
+              EOF
+            end
+
+            it 'parses XML and returns coverage information per class' do
+              expect { parse_report }.not_to raise_error
+
+              expect(coverage_report.files).to eq({ 'app.rb' => { 1 => 2 }, 'foo.rb' => { 6 => 1 } })
+            end
+          end
+
+          context 'with the same filename and different lines' do
+            let(:classes_xml) do
+              <<~EOF
+              <packages><package name="app"><classes>
+                <class filename="app.rb"><methods/><lines>
+                  <line number="1" hits="2"/>
+                  <line number="2" hits="0"/>
+                </lines></class>
+                <class filename="app.rb"><methods/><lines>
+                  <line number="6" hits="1"/>
+                  <line number="7" hits="1"/>
+                </lines></class>
+              </classes></package></packages>
+              EOF
+            end
+
+            it 'parses XML and returns a single file with merged coverage' do
+              expect { parse_report }.not_to raise_error
+
+              expect(coverage_report.files).to eq({ 'app.rb' => { 1 => 2, 2 => 0, 6 => 1, 7 => 1 } })
+            end
+          end
+
+          context 'with the same filename and lines' do
+            let(:classes_xml) do
+              <<~EOF
+              <packages><package name="app"><classes>
+                <class filename="app.rb"><methods/><lines>
+                  <line number="1" hits="2"/>
+                  <line number="2" hits="0"/>
+                </lines></class>
+                <class filename="app.rb"><methods/><lines>
+                  <line number="1" hits="1"/>
+                  <line number="2" hits="1"/>
+                </lines></class>
+              </classes></package></packages>
+              EOF
+            end
+
+            it 'parses XML and returns a single file with summed-up coverage' do
+              expect { parse_report }.not_to raise_error
+
+              expect(coverage_report.files).to eq({ 'app.rb' => { 1 => 3, 2 => 1 } })
+            end
+          end
+
+          context 'with missing filename' do
+            let(:classes_xml) do
+              <<~EOF
+              <packages><package name="app"><classes>
+                <class filename="app.rb"><methods/><lines>
+                  <line number="1" hits="2"/>
+                  <line number="2" hits="0"/>
+                </lines></class>
+                <class><methods/><lines>
+                  <line number="6" hits="1"/>
+                  <line number="7" hits="1"/>
+                </lines></class>
+              </classes></package></packages>
+              EOF
+            end
+
+            it 'parses XML and ignores class with missing name' do
+              expect { parse_report }.not_to raise_error
+
+              expect(coverage_report.files).to eq({ 'app.rb' => { 1 => 2, 2 => 0 } })
+            end
+          end
+
+          context 'with invalid line information' do
+            let(:classes_xml) do
+              <<~EOF
+              <packages><package name="app"><classes>
+                <class filename="app.rb"><methods/><lines>
+                  <line number="1" hits="2"/>
+                  <line number="2" hits="0"/>
+                </lines></class>
+                <class filename="app.rb"><methods/><lines>
+                  <line null="test" hits="1"/>
+                  <line number="7" hits="1"/>
+                </lines></class>
+              </classes></package></packages>
+              EOF
+            end
+
+            it 'raises an error' do
+              expect { parse_report }.to raise_error(described_class::InvalidLineInformationError)
+            end
+          end
+        end
+      end
+
+      context 'when there is no <sources>' do
+        let(:sources_xml) { '' }
+
+        it_behaves_like 'ignoring sources, project_path, and worktree_paths'
+      end
+
+      context 'when there is a <sources>' do
+        context 'and has a single source with a pattern for Go projects' do
+          let(:project_path) { 'local/go' } # Make sure we're not making false positives
+          let(:sources_xml) do
+            <<~EOF
             <sources>
-              <source>project/src</source>
+              <source>/usr/local/go/src</source>
             </sources>
             EOF
           end
 
-          it_behaves_like 'ignoring sources'
+          it_behaves_like 'ignoring sources, project_path, and worktree_paths'
         end
 
-        context 'and has multiple sources' do
-          let(:cobertura) do
-            <<-EOF.strip_heredoc
+        context 'and has multiple sources with a pattern for Go projects' do
+          let(:project_path) { 'local/go' } # Make sure we're not making false positives
+          let(:sources_xml) do
+            <<~EOF
             <sources>
-              <source>project/src/foo</source>
-              <source>project/src/bar</source>
+              <source>/usr/local/go/src</source>
+              <source>/go/src</source>
             </sources>
             EOF
           end
 
-          it_behaves_like 'ignoring sources'
+          it_behaves_like 'ignoring sources, project_path, and worktree_paths'
         end
-      end
 
-      context 'when there is a single <class>' do
-        context 'with no lines' do
-          let(:cobertura) do
-            <<-EOF.strip_heredoc
-            <classes><class filename="app.rb"></class></classes>
+        context 'and has a single source but already is at the project root path' do
+          let(:sources_xml) do
+            <<~EOF
+            <sources>
+              <source>builds/#{project_path}</source>
+            </sources>
             EOF
           end
 
-          it 'parses XML and returns empty coverage' do
-            expect { subject }.not_to raise_error
+          it_behaves_like 'ignoring sources, project_path, and worktree_paths'
+        end
 
-            expect(coverage_report.files).to eq({})
+        context 'and has multiple sources but already are at the project root path' do
+          let(:sources_xml) do
+            <<~EOF
+            <sources>
+              <source>builds/#{project_path}/</source>
+              <source>builds/somewhere/#{project_path}</source>
+            </sources>
+            EOF
           end
+
+          it_behaves_like 'ignoring sources, project_path, and worktree_paths'
         end
 
-        context 'with a single line' do
-          let(:cobertura) do
-            <<-EOF.strip_heredoc
-            <classes>
-              <class filename="app.rb"><lines>
-                <line number="1" hits="2"/>
-              </lines></class>
-            </classes>
+        context 'and has a single source that is not at the project root path' do
+          let(:sources_xml) do
+            <<~EOF
+            <sources>
+              <source>builds/#{project_path}/app</source>
+            </sources>
             EOF
           end
 
-          it 'parses XML and returns a single file with coverage' do
-            expect { subject }.not_to raise_error
+          context 'when there is no <class>' do
+            let(:classes_xml) { '' }
 
-            expect(coverage_report.files).to eq({ 'app.rb' => { 1 => 2 } })
-          end
-        end
+            it 'parses XML and returns empty coverage' do
+              expect { parse_report }.not_to raise_error
 
-        context 'with multipe lines and methods info' do
-          let(:cobertura) do
-            <<-EOF.strip_heredoc
-            <classes>
-              <class filename="app.rb"><methods/><lines>
-                <line number="1" hits="2"/>
-                <line number="2" hits="0"/>
-              </lines></class>
-            </classes>
-            EOF
+              expect(coverage_report.files).to eq({})
+            end
           end
 
-          it 'parses XML and returns a single file with coverage' do
-            expect { subject }.not_to raise_error
+          context 'when there is a single <class>' do
+            context 'with no lines' do
+              let(:classes_xml) do
+                <<~EOF
+                <packages><package name="app"><classes>
+                  <class filename="user.rb"></class>
+                </classes></package></packages>
+                EOF
+              end
+
+              it 'parses XML and returns empty coverage' do
+                expect { parse_report }.not_to raise_error
+
+                expect(coverage_report.files).to eq({})
+              end
+            end
+
+            context 'with a single line but the filename cannot be determined based on extracted source and worktree paths' do
+              let(:classes_xml) do
+                <<~EOF
+                <packages><package name="app"><classes>
+                  <class filename="member.rb"><lines>
+                    <line number="1" hits="2"/>
+                  </lines></class>
+                </classes></package></packages>
+                EOF
+              end
+
+              it 'parses XML and returns empty coverage' do
+                expect { parse_report }.not_to raise_error
+
+                expect(coverage_report.files).to eq({})
+              end
+            end
+
+            context 'with a single line' do
+              let(:classes_xml) do
+                <<~EOF
+                <packages><package name="app"><classes>
+                  <class filename="user.rb"><lines>
+                    <line number="1" hits="2"/>
+                  </lines></class>
+                </classes></package></packages>
+                EOF
+              end
+
+              it 'parses XML and returns a single file with the filename relative to project root' do
+                expect { parse_report }.not_to raise_error
+
+                expect(coverage_report.files).to eq({ 'app/user.rb' => { 1 => 2 } })
+              end
+            end
+
+            context 'with multiple lines and methods info' do
+              let(:classes_xml) do
+                <<~EOF
+                <packages><package name="app"><classes>
+                  <class filename="user.rb"><methods/><lines>
+                    <line number="1" hits="2"/>
+                    <line number="2" hits="0"/>
+                  </lines></class>
+                </classes></package></packages>
+                EOF
+              end
+
+              it 'parses XML and returns a single file with the filename relative to project root' do
+                expect { parse_report }.not_to raise_error
+
+                expect(coverage_report.files).to eq({ 'app/user.rb' => { 1 => 2, 2 => 0 } })
+              end
+            end
+          end
 
-            expect(coverage_report.files).to eq({ 'app.rb' => { 1 => 2, 2 => 0 } })
+          context 'when there are multiple <class>' do
+            context 'with the same filename but the filename cannot be determined based on extracted source and worktree paths' do
+              let(:classes_xml) do
+                <<~EOF
+                <packages><package name="app"><classes>
+                  <class filename="member.rb"><methods/><lines>
+                    <line number="1" hits="2"/>
+                    <line number="2" hits="0"/>
+                  </lines></class>
+                  <class filename="member.rb"><methods/><lines>
+                    <line number="6" hits="1"/>
+                    <line number="7" hits="1"/>
+                  </lines></class>
+                </classes></package></packages>
+                EOF
+              end
+
+              it 'parses XML and returns empty coverage' do
+                expect { parse_report }.not_to raise_error
+
+                expect(coverage_report.files).to eq({})
+              end
+            end
+
+            context 'without a parent package' do
+              let(:classes_xml) do
+                <<~EOF
+                <packages>
+                  <class filename="user.rb"><methods/><lines>
+                    <line number="1" hits="2"/>
+                    <line number="2" hits="0"/>
+                  </lines></class>
+                  <class filename="user.rb"><methods/><lines>
+                    <line number="6" hits="1"/>
+                    <line number="7" hits="1"/>
+                  </lines></class>
+                </packages>
+                EOF
+              end
+
+              it 'parses XML and returns coverage information with the filename relative to project root' do
+                expect { parse_report }.not_to raise_error
+
+                expect(coverage_report.files).to eq({ 'app/user.rb' => { 1 => 2, 2 => 0, 6 => 1, 7 => 1 } })
+              end
+            end
+
+            context 'with the same filename and different lines' do
+              let(:classes_xml) do
+                <<~EOF
+                <packages><package name="app"><classes>
+                  <class filename="user.rb"><methods/><lines>
+                    <line number="1" hits="2"/>
+                    <line number="2" hits="0"/>
+                  </lines></class>
+                  <class filename="user.rb"><methods/><lines>
+                    <line number="6" hits="1"/>
+                    <line number="7" hits="1"/>
+                  </lines></class>
+                </classes></package></packages>
+                EOF
+              end
+
+              it 'parses XML and returns a single file with merged coverage, and with the filename relative to project root' do
+                expect { parse_report }.not_to raise_error
+
+                expect(coverage_report.files).to eq({ 'app/user.rb' => { 1 => 2, 2 => 0, 6 => 1, 7 => 1 } })
+              end
+            end
+
+            context 'with the same filename and lines' do
+              let(:classes_xml) do
+                <<~EOF
+                <packages><package name="app"><classes>
+                  <class filename="user.rb"><methods/><lines>
+                    <line number="1" hits="2"/>
+                    <line number="2" hits="0"/>
+                  </lines></class>
+                  <class filename="user.rb"><methods/><lines>
+                    <line number="1" hits="1"/>
+                    <line number="2" hits="1"/>
+                  </lines></class>
+                </classes></package></packages>
+                EOF
+              end
+
+              it 'parses XML and returns a single file with summed-up coverage, and with the filename relative to project root' do
+                expect { parse_report }.not_to raise_error
+
+                expect(coverage_report.files).to eq({ 'app/user.rb' => { 1 => 3, 2 => 1 } })
+              end
+            end
+
+            context 'with missing filename' do
+              let(:classes_xml) do
+                <<~EOF
+                <packages><package name="app"><classes>
+                  <class filename="user.rb"><methods/><lines>
+                    <line number="1" hits="2"/>
+                    <line number="2" hits="0"/>
+                  </lines></class>
+                  <class><methods/><lines>
+                    <line number="6" hits="1"/>
+                    <line number="7" hits="1"/>
+                  </lines></class>
+                </classes></package></packages>
+                EOF
+              end
+
+              it 'parses XML and ignores class with missing name' do
+                expect { parse_report }.not_to raise_error
+
+                expect(coverage_report.files).to eq({ 'app/user.rb' => { 1 => 2, 2 => 0 } })
+              end
+            end
+
+            context 'with filename that cannot be determined based on extracted source and worktree paths' do
+              let(:classes_xml) do
+                <<~EOF
+                <packages><package name="app"><classes>
+                  <class filename="user.rb"><methods/><lines>
+                    <line number="1" hits="2"/>
+                    <line number="2" hits="0"/>
+                  </lines></class>
+                  <class filename="member.rb"><methods/><lines>
+                    <line number="6" hits="1"/>
+                    <line number="7" hits="1"/>
+                  </lines></class>
+                </classes></package></packages>
+                EOF
+              end
+
+              it 'parses XML and ignores class with undetermined filename' do
+                expect { parse_report }.not_to raise_error
+
+                expect(coverage_report.files).to eq({ 'app/user.rb' => { 1 => 2, 2 => 0 } })
+              end
+            end
+
+            context 'with invalid line information' do
+              let(:classes_xml) do
+                <<~EOF
+                <packages><package name="app"><classes>
+                  <class filename="user.rb"><methods/><lines>
+                    <line number="1" hits="2"/>
+                    <line number="2" hits="0"/>
+                  </lines></class>
+                  <class filename="user.rb"><methods/><lines>
+                    <line null="test" hits="1"/>
+                    <line number="7" hits="1"/>
+                  </lines></class>
+                </classes></package></packages>
+                EOF
+              end
+
+              it 'raises an error' do
+                expect { parse_report }.to raise_error(described_class::InvalidLineInformationError)
+              end
+            end
           end
         end
-      end
 
-      context 'when there are multipe <class>' do
-        context 'with the same filename and different lines' do
-          let(:cobertura) do
-            <<-EOF.strip_heredoc
-            <classes>
-              <class filename="app.rb"><methods/><lines>
-                <line number="1" hits="2"/>
-                <line number="2" hits="0"/>
-              </lines></class>
-              <class filename="app.rb"><methods/><lines>
-                <line number="6" hits="1"/>
-                <line number="7" hits="1"/>
-              </lines></class>
-            </classes>
+        context 'and has multiple sources that are not at the project root path' do
+          let(:sources_xml) do
+            <<~EOF
+            <sources>
+              <source>builds/#{project_path}/app1/</source>
+              <source>builds/#{project_path}/app2/</source>
+            </sources>
             EOF
           end
 
-          it 'parses XML and returns a single file with merged coverage' do
-            expect { subject }.not_to raise_error
-
-            expect(coverage_report.files).to eq({ 'app.rb' => { 1 => 2, 2 => 0, 6 => 1, 7 => 1 } })
+          context 'and a class filename is available under multiple extracted sources' do
+            let(:paths) { ['app1/user.rb', 'app2/user.rb'] }
+
+            let(:classes_xml) do
+              <<~EOF
+              <package name="app1">
+                <classes>
+                  <class filename="user.rb"><lines>
+                    <line number="1" hits="2"/>
+                  </lines></class>
+                </classes>
+              </package>
+              <package name="app2">
+                <classes>
+                  <class filename="user.rb"><lines>
+                    <line number="2" hits="3"/>
+                  </lines></class>
+                </classes>
+              </package>
+              EOF
+            end
+
+            it 'parses XML and returns the files with the filename relative to project root' do
+              expect { parse_report }.not_to raise_error
+
+              expect(coverage_report.files).to eq({
+                'app1/user.rb' => { 1 => 2 },
+                'app2/user.rb' => { 2 => 3 }
+              })
+            end
           end
-        end
 
-        context 'with the same filename and lines' do
-          let(:cobertura) do
-            <<-EOF.strip_heredoc
-            <packages><package><classes>
-              <class filename="app.rb"><methods/><lines>
-                <line number="1" hits="2"/>
-                <line number="2" hits="0"/>
-              </lines></class>
-              <class filename="app.rb"><methods/><lines>
-                <line number="1" hits="1"/>
-                <line number="2" hits="1"/>
-              </lines></class>
-            </classes></package></packages>
-            EOF
+          context 'and a class filename is available under one of the extracted sources' do
+            let(:paths) { ['app1/member.rb', 'app2/user.rb', 'app2/pet.rb'] }
+
+            let(:classes_xml) do
+              <<~EOF
+              <packages><package name="app"><classes>
+                <class filename="user.rb"><lines>
+                  <line number="1" hits="2"/>
+                </lines></class>
+              </classes></package></packages>
+              EOF
+            end
+
+            it 'parses XML and returns a single file with the filename relative to project root using the extracted source where it is first found under' do
+              expect { parse_report }.not_to raise_error
+
+              expect(coverage_report.files).to eq({ 'app2/user.rb' => { 1 => 2 } })
+            end
           end
 
-          it 'parses XML and returns a single file with summed-up coverage' do
-            expect { subject }.not_to raise_error
+          context 'and a class filename is not found under any of the extracted sources' do
+            let(:paths) { ['app1/member.rb', 'app2/pet.rb'] }
 
-            expect(coverage_report.files).to eq({ 'app.rb' => { 1 => 3, 2 => 1 } })
+            let(:classes_xml) do
+              <<~EOF
+              <packages><package name="app"><classes>
+                <class filename="user.rb"><lines>
+                  <line number="1" hits="2"/>
+                </lines></class>
+              </classes></package></packages>
+              EOF
+            end
+
+            it 'parses XML and returns empty coverage' do
+              expect { parse_report }.not_to raise_error
+
+              expect(coverage_report.files).to eq({})
+            end
           end
-        end
 
-        context 'with missing filename' do
-          let(:cobertura) do
-            <<-EOF.strip_heredoc
-            <classes>
-              <class filename="app.rb"><methods/><lines>
-                <line number="1" hits="2"/>
-                <line number="2" hits="0"/>
-              </lines></class>
-              <class><methods/><lines>
-                <line number="6" hits="1"/>
-                <line number="7" hits="1"/>
-              </lines></class>
-            </classes>
-            EOF
+          context 'and a class filename is not found under any of the extracted sources within the iteratable limit' do
+            let(:paths) { ['app2/user.rb'] }
+
+            let(:classes_xml) do
+              <<~EOF
+              <packages><package name="app"><classes>
+                <class filename="record.rb"><lines>
+                  <line number="1" hits="2"/>
+                </lines></class>
+                <class filename="user.rb"><lines>
+                  <line number="1" hits="2"/>
+                </lines></class>
+              </classes></package></packages>
+              EOF
+            end
+
+            before do
+              stub_const("#{described_class}::MAX_SOURCES", 1)
+            end
+
+            it 'parses XML and returns empty coverage' do
+              expect { parse_report }.not_to raise_error
+
+              expect(coverage_report.files).to eq({})
+            end
           end
+        end
+      end
 
-          it 'parses XML and ignores class with missing name' do
-            expect { subject }.not_to raise_error
+      shared_examples_for 'non-smart parsing' do
+        let(:sources_xml) do
+          <<~EOF
+          <sources>
+            <source>builds/foo/bar/app</source>
+          </sources>
+          EOF
+        end
 
-            expect(coverage_report.files).to eq({ 'app.rb' => { 1 => 2, 2 => 0 } })
-          end
+        let(:classes_xml) do
+          <<~EOF
+          <packages><package name="app"><classes>
+            <class filename="user.rb"><lines>
+              <line number="1" hits="2"/>
+            </lines></class>
+          </classes></package></packages>
+          EOF
         end
 
-        context 'with invalid line information' do
-          let(:cobertura) do
-            <<-EOF.strip_heredoc
-            <classes>
-              <class filename="app.rb"><methods/><lines>
-                <line number="1" hits="2"/>
-                <line number="2" hits="0"/>
-              </lines></class>
-              <class filename="app.rb"><methods/><lines>
-                <line null="test" hits="1"/>
-                <line number="7" hits="1"/>
-              </lines></class>
-            </classes>
-            EOF
-          end
+        it 'parses XML and returns filenames unchanged just as how they are found in the class node' do
+          expect { parse_report }.not_to raise_error
 
-          it 'raises an error' do
-            expect { subject }.to raise_error(described_class::CoberturaParserError)
-          end
+          expect(coverage_report.files).to eq({ 'user.rb' => { 1 => 2 } })
         end
       end
+
+      context 'when project_path is not present' do
+        let(:project_path) { nil }
+        let(:paths) { ['app/user.rb'] }
+
+        it_behaves_like 'non-smart parsing'
+      end
+
+      context 'when worktree_paths is not present' do
+        let(:project_path) { 'foo/bar' }
+        let(:paths) { nil }
+
+        it_behaves_like 'non-smart parsing'
+      end
     end
 
     context 'when data is not Cobertura style XML' do
       let(:cobertura) { { coverage: '12%' }.to_json }
 
       it 'raises an error' do
-        expect { subject }.to raise_error(described_class::CoberturaParserError)
+        expect { parse_report }.to raise_error(described_class::InvalidXMLError)
       end
     end
   end
diff --git a/spec/models/ci/build_spec.rb b/spec/models/ci/build_spec.rb
index 0efb014fdfcb1..ade78efe23984 100644
--- a/spec/models/ci/build_spec.rb
+++ b/spec/models/ci/build_spec.rb
@@ -4059,13 +4059,40 @@ def run_job_without_exception
         end
       end
 
+      context 'when there is a Cobertura coverage report with class filename paths not relative to project root' do
+        before do
+          allow(build.project).to receive(:full_path).and_return('root/javademo')
+          allow(build.pipeline).to receive(:all_worktree_paths).and_return(['src/main/java/com/example/javademo/User.java'])
+
+          create(:ci_job_artifact, :coverage_with_paths_not_relative_to_project_root, job: build, project: build.project)
+        end
+
+        it 'parses blobs and add the results to the coverage report with corrected paths' do
+          expect { subject }.not_to raise_error
+
+          expect(coverage_report.files.keys).to match_array(['src/main/java/com/example/javademo/User.java'])
+        end
+
+        context 'and smart_cobertura_parser feature flag is disabled' do
+          before do
+            stub_feature_flags(smart_cobertura_parser: false)
+          end
+
+          it 'parses blobs and add the results to the coverage report with unmodified paths' do
+            expect { subject }.not_to raise_error
+
+            expect(coverage_report.files.keys).to match_array(['com/example/javademo/User.java'])
+          end
+        end
+      end
+
       context 'when there is a corrupted Cobertura coverage report' do
         before do
           create(:ci_job_artifact, :coverage_with_corrupted_data, job: build, project: build.project)
         end
 
         it 'raises an error' do
-          expect { subject }.to raise_error(Gitlab::Ci::Parsers::Coverage::Cobertura::CoberturaParserError)
+          expect { subject }.to raise_error(Gitlab::Ci::Parsers::Coverage::Cobertura::InvalidLineInformationError)
         end
       end
     end
diff --git a/spec/models/ci/pipeline_spec.rb b/spec/models/ci/pipeline_spec.rb
index 05387f06746bc..f50307ba04320 100644
--- a/spec/models/ci/pipeline_spec.rb
+++ b/spec/models/ci/pipeline_spec.rb
@@ -3426,6 +3426,16 @@ def create_build(name, stage_idx)
         ])
       end
 
+      it 'does not execute N+1 queries' do
+        single_build_pipeline = create(:ci_empty_pipeline, status: :created, project: project)
+        single_rspec = create(:ci_build, :success, name: 'rspec', pipeline: single_build_pipeline, project: project)
+        create(:ci_job_artifact, :cobertura, job: single_rspec, project: project)
+
+        control = ActiveRecord::QueryRecorder.new { single_build_pipeline.coverage_reports }
+
+        expect { subject }.not_to exceed_query_limit(control)
+      end
+
       context 'when builds are retried' do
         let!(:build_rspec) { create(:ci_build, :retried, :success, name: 'rspec', pipeline: pipeline, project: project) }
         let!(:build_golang) { create(:ci_build, :retried, :success, name: 'golang', pipeline: pipeline, project: project) }
diff --git a/spec/services/ci/pipelines/create_artifact_service_spec.rb b/spec/services/ci/pipelines/create_artifact_service_spec.rb
index 6f177889ed3f8..4e9248d9d1a28 100644
--- a/spec/services/ci/pipelines/create_artifact_service_spec.rb
+++ b/spec/services/ci/pipelines/create_artifact_service_spec.rb
@@ -7,7 +7,8 @@
     subject { described_class.new.execute(pipeline) }
 
     context 'when pipeline has coverage reports' do
-      let(:pipeline) { create(:ci_pipeline, :with_coverage_reports) }
+      let(:project) { create(:project, :repository) }
+      let(:pipeline) { create(:ci_pipeline, :with_coverage_reports, project: project) }
 
       context 'when pipeline is finished' do
         it 'creates a pipeline artifact' do
-- 
GitLab