From dd453d1e5ee742f83e262e6bb060eac69697d15e Mon Sep 17 00:00:00 2001
From: Erick Bajao <fbajao@gitlab.com>
Date: Wed, 17 May 2023 04:30:59 +0000
Subject: [PATCH] Add support for final store path prefix

---
 .../ci/job_artifacts/create_service.rb        |   3 +-
 app/uploaders/object_storage.rb               |  28 ++++-
 .../ci/job_artifacts/create_service_spec.rb   |   6 +-
 spec/uploaders/object_storage_spec.rb         | 104 ++++++++++--------
 4 files changed, 90 insertions(+), 51 deletions(-)

diff --git a/app/services/ci/job_artifacts/create_service.rb b/app/services/ci/job_artifacts/create_service.rb
index f7e04c594637..1647e9210923 100644
--- a/app/services/ci/job_artifacts/create_service.rb
+++ b/app/services/ci/job_artifacts/create_service.rb
@@ -26,7 +26,8 @@ def authorize(artifact_type:, filesize: nil)
         headers = JobArtifactUploader.workhorse_authorize(
           has_length: false,
           maximum_size: max_size(artifact_type),
-          use_final_store_path: Feature.enabled?(:ci_artifacts_upload_to_final_location, project)
+          use_final_store_path: Feature.enabled?(:ci_artifacts_upload_to_final_location, project),
+          final_store_path_root_id: project.id
         )
 
         if lsif?(artifact_type)
diff --git a/app/uploaders/object_storage.rb b/app/uploaders/object_storage.rb
index 4188e0caa8ea..e1e103e20f88 100644
--- a/app/uploaders/object_storage.rb
+++ b/app/uploaders/object_storage.rb
@@ -11,6 +11,7 @@ module ObjectStorage
   RemoteStoreError = Class.new(StandardError)
   UnknownStoreError = Class.new(StandardError)
   ObjectStorageUnavailable = Class.new(StandardError)
+  MissingFinalStorePathRootId = Class.new(StandardError)
 
   class ExclusiveLeaseTaken < StandardError
     def initialize(lease_key)
@@ -153,21 +154,30 @@ def generate_remote_id
         [CarrierWave.generate_cache_id, SecureRandom.hex].join('-')
       end
 
-      def generate_final_store_path
+      def generate_final_store_path(root_id:)
         hash = Digest::SHA2.hexdigest(SecureRandom.uuid)
 
         # We prefix '@final' to prevent clashes and make the files easily recognizable
         # as having been created by this code.
-        File.join('@final', hash[0..1], hash[2..3], hash[4..])
+        sub_path = File.join('@final', hash[0..1], hash[2..3], hash[4..])
+
+        # We generate a hashed path of the root ID (e.g. Project ID) to distribute directories instead of
+        # filling up one root directory with a bunch of files.
+        Gitlab::HashedPath.new(sub_path, root_hash: root_id).to_s
       end
 
-      def workhorse_authorize(has_length:, maximum_size: nil, use_final_store_path: false)
+      def workhorse_authorize(
+        has_length:,
+        maximum_size: nil,
+        use_final_store_path: false,
+        final_store_path_root_id: nil)
         {}.tap do |hash|
           if self.direct_upload_to_object_store?
             hash[:RemoteObject] = workhorse_remote_upload_options(
               has_length: has_length,
               maximum_size: maximum_size,
-              use_final_store_path: use_final_store_path
+              use_final_store_path: use_final_store_path,
+              final_store_path_root_id: final_store_path_root_id
             )
           else
             hash[:TempPath] = workhorse_local_upload_path
@@ -190,11 +200,17 @@ def object_store_config
         ObjectStorage::Config.new(object_store_options)
       end
 
-      def workhorse_remote_upload_options(has_length:, maximum_size: nil, use_final_store_path: false)
+      def workhorse_remote_upload_options(
+        has_length:,
+        maximum_size: nil,
+        use_final_store_path: false,
+        final_store_path_root_id: nil)
         return unless direct_upload_to_object_store?
 
         if use_final_store_path
-          id = generate_final_store_path
+          raise MissingFinalStorePathRootId unless final_store_path_root_id.present?
+
+          id = generate_final_store_path(root_id: final_store_path_root_id)
           upload_path = with_bucket_prefix(id)
           prepare_pending_direct_upload(id)
         else
diff --git a/spec/services/ci/job_artifacts/create_service_spec.rb b/spec/services/ci/job_artifacts/create_service_spec.rb
index f71d7feb04ac..73cb791f1451 100644
--- a/spec/services/ci/job_artifacts/create_service_spec.rb
+++ b/spec/services/ci/job_artifacts/create_service_spec.rb
@@ -82,7 +82,11 @@
 
         before do
           stub_artifacts_object_storage(JobArtifactUploader, direct_upload: true)
-          allow(JobArtifactUploader).to receive(:generate_final_store_path).and_return(final_store_path)
+
+          allow(JobArtifactUploader)
+            .to receive(:generate_final_store_path)
+            .with(root_id: project.id)
+            .and_return(final_store_path)
         end
 
         it 'includes the authorize headers' do
diff --git a/spec/uploaders/object_storage_spec.rb b/spec/uploaders/object_storage_spec.rb
index 1566021934a6..a748c544bfdc 100644
--- a/spec/uploaders/object_storage_spec.rb
+++ b/spec/uploaders/object_storage_spec.rb
@@ -525,12 +525,14 @@ def when_file_is_in_use
     let(:has_length) { true }
     let(:maximum_size) { nil }
     let(:use_final_store_path) { false }
+    let(:final_store_path_root_id) { nil }
 
     subject do
       uploader_class.workhorse_authorize(
         has_length: has_length,
         maximum_size: maximum_size,
-        use_final_store_path: use_final_store_path
+        use_final_store_path: use_final_store_path,
+        final_store_path_root_id: final_store_path_root_id
       )
     end
 
@@ -615,51 +617,30 @@ def when_file_is_in_use
     shared_examples 'handling object storage final upload path' do |multipart|
       context 'when use_final_store_path is true' do
         let(:use_final_store_path) { true }
-        let(:final_store_path) { File.join('@final', 'abc', '123', 'somefilename') }
+        let(:final_store_path_root_id) { 12345 }
+        let(:final_store_path) { File.join('@final', 'myprefix', 'abc', '123', 'somefilename') }
         let(:escaped_path) { escape_path(final_store_path) }
 
-        before do
-          stub_object_storage_multipart_init_with_final_store_path("#{storage_url}#{final_store_path}") if multipart
-
-          allow(uploader_class).to receive(:generate_final_store_path).and_return(final_store_path)
-        end
-
-        it 'uses the full path instead of the temporary one' do
-          expect(subject[:RemoteObject][:ID]).to eq(final_store_path)
+        context 'and final_store_path_root_id was not given' do
+          let(:final_store_path_root_id) { nil }
 
-          expect(subject[:RemoteObject][:GetURL]).to include(escaped_path)
-          expect(subject[:RemoteObject][:StoreURL]).to include(escaped_path)
-
-          if multipart
-            expect(subject[:RemoteObject][:MultipartUpload][:PartURLs]).to all(include(escaped_path))
-            expect(subject[:RemoteObject][:MultipartUpload][:CompleteURL]).to include(escaped_path)
-            expect(subject[:RemoteObject][:MultipartUpload][:AbortURL]).to include(escaped_path)
+          it 'raises an error' do
+            expect { subject }.to raise_error(ObjectStorage::MissingFinalStorePathRootId)
           end
-
-          expect(subject[:RemoteObject][:SkipDelete]).to eq(true)
-
-          expect(
-            ObjectStorage::PendingDirectUpload.exists?(uploader_class.storage_location_identifier, final_store_path)
-          ).to eq(true)
         end
 
-        context 'and bucket prefix is configured' do
-          let(:prefixed_final_store_path) { "my/prefix/#{final_store_path}" }
-          let(:escaped_path) { escape_path(prefixed_final_store_path) }
-
+        context 'and final_store_path_root_id was given' do
           before do
-            allow(uploader_class.object_store_options).to receive(:bucket_prefix).and_return('my/prefix')
+            stub_object_storage_multipart_init_with_final_store_path("#{storage_url}#{final_store_path}") if multipart
 
-            if multipart
-              stub_object_storage_multipart_init_with_final_store_path("#{storage_url}#{prefixed_final_store_path}")
-            end
+            allow(uploader_class).to receive(:generate_final_store_path)
+              .with(root_id: final_store_path_root_id)
+              .and_return(final_store_path)
           end
 
-          it 'sets the remote object ID to the final path without prefix' do
+          it 'uses the full path instead of the temporary one' do
             expect(subject[:RemoteObject][:ID]).to eq(final_store_path)
-          end
 
-          it 'returns the final path with prefix' do
             expect(subject[:RemoteObject][:GetURL]).to include(escaped_path)
             expect(subject[:RemoteObject][:StoreURL]).to include(escaped_path)
 
@@ -668,15 +649,49 @@ def when_file_is_in_use
               expect(subject[:RemoteObject][:MultipartUpload][:CompleteURL]).to include(escaped_path)
               expect(subject[:RemoteObject][:MultipartUpload][:AbortURL]).to include(escaped_path)
             end
-          end
 
-          it 'creates the pending upload entry without the prefix' do
-            is_expected.to have_key(:RemoteObject)
+            expect(subject[:RemoteObject][:SkipDelete]).to eq(true)
 
             expect(
               ObjectStorage::PendingDirectUpload.exists?(uploader_class.storage_location_identifier, final_store_path)
             ).to eq(true)
           end
+
+          context 'and bucket prefix is configured' do
+            let(:prefixed_final_store_path) { "my/prefix/#{final_store_path}" }
+            let(:escaped_path) { escape_path(prefixed_final_store_path) }
+
+            before do
+              allow(uploader_class.object_store_options).to receive(:bucket_prefix).and_return('my/prefix')
+
+              if multipart
+                stub_object_storage_multipart_init_with_final_store_path("#{storage_url}#{prefixed_final_store_path}")
+              end
+            end
+
+            it 'sets the remote object ID to the final path without prefix' do
+              expect(subject[:RemoteObject][:ID]).to eq(final_store_path)
+            end
+
+            it 'returns the final path with prefix' do
+              expect(subject[:RemoteObject][:GetURL]).to include(escaped_path)
+              expect(subject[:RemoteObject][:StoreURL]).to include(escaped_path)
+
+              if multipart
+                expect(subject[:RemoteObject][:MultipartUpload][:PartURLs]).to all(include(escaped_path))
+                expect(subject[:RemoteObject][:MultipartUpload][:CompleteURL]).to include(escaped_path)
+                expect(subject[:RemoteObject][:MultipartUpload][:AbortURL]).to include(escaped_path)
+              end
+            end
+
+            it 'creates the pending upload entry without the bucket prefix' do
+              is_expected.to have_key(:RemoteObject)
+
+              expect(
+                ObjectStorage::PendingDirectUpload.exists?(uploader_class.storage_location_identifier, final_store_path)
+              ).to eq(true)
+            end
+          end
         end
       end
 
@@ -716,7 +731,7 @@ def escape_path(path)
           end
 
           before do
-            expect_next_instance_of(ObjectStorage::Config) do |instance|
+            allow_next_instance_of(ObjectStorage::Config) do |instance|
               allow(instance).to receive(:credentials).and_return(credentials)
             end
           end
@@ -767,7 +782,7 @@ def escape_path(path)
           end
 
           before do
-            expect_next_instance_of(ObjectStorage::Config) do |instance|
+            allow_next_instance_of(ObjectStorage::Config) do |instance|
               allow(instance).to receive(:credentials).and_return(credentials)
             end
           end
@@ -812,7 +827,7 @@ def escape_path(path)
           end
 
           before do
-            expect_next_instance_of(ObjectStorage::Config) do |instance|
+            allow_next_instance_of(ObjectStorage::Config) do |instance|
               allow(instance).to receive(:credentials).and_return(credentials)
             end
           end
@@ -1184,14 +1199,17 @@ def escape_path(path)
   end
 
   describe '.generate_final_store_path' do
-    subject(:final_path) { uploader_class.generate_final_store_path }
+    let(:root_id) { 12345 }
+    let(:expected_root_hashed_path) { Gitlab::HashedPath.new(root_hash: root_id) }
+
+    subject(:final_path) { uploader_class.generate_final_store_path(root_id: root_id) }
 
     before do
       allow(Digest::SHA2).to receive(:hexdigest).and_return('somehash1234')
     end
 
-    it 'returns the generated hashed path' do
-      expect(final_path).to eq('@final/so/me/hash1234')
+    it 'returns the generated hashed path nested under the hashed path of the root ID' do
+      expect(final_path).to eq(File.join(expected_root_hashed_path, '@final/so/me/hash1234'))
     end
   end
 
-- 
GitLab