From dd453d1e5ee742f83e262e6bb060eac69697d15e Mon Sep 17 00:00:00 2001 From: Erick Bajao <fbajao@gitlab.com> Date: Wed, 17 May 2023 04:30:59 +0000 Subject: [PATCH] Add support for final store path prefix --- .../ci/job_artifacts/create_service.rb | 3 +- app/uploaders/object_storage.rb | 28 ++++- .../ci/job_artifacts/create_service_spec.rb | 6 +- spec/uploaders/object_storage_spec.rb | 104 ++++++++++-------- 4 files changed, 90 insertions(+), 51 deletions(-) diff --git a/app/services/ci/job_artifacts/create_service.rb b/app/services/ci/job_artifacts/create_service.rb index f7e04c594637..1647e9210923 100644 --- a/app/services/ci/job_artifacts/create_service.rb +++ b/app/services/ci/job_artifacts/create_service.rb @@ -26,7 +26,8 @@ def authorize(artifact_type:, filesize: nil) headers = JobArtifactUploader.workhorse_authorize( has_length: false, maximum_size: max_size(artifact_type), - use_final_store_path: Feature.enabled?(:ci_artifacts_upload_to_final_location, project) + use_final_store_path: Feature.enabled?(:ci_artifacts_upload_to_final_location, project), + final_store_path_root_id: project.id ) if lsif?(artifact_type) diff --git a/app/uploaders/object_storage.rb b/app/uploaders/object_storage.rb index 4188e0caa8ea..e1e103e20f88 100644 --- a/app/uploaders/object_storage.rb +++ b/app/uploaders/object_storage.rb @@ -11,6 +11,7 @@ module ObjectStorage RemoteStoreError = Class.new(StandardError) UnknownStoreError = Class.new(StandardError) ObjectStorageUnavailable = Class.new(StandardError) + MissingFinalStorePathRootId = Class.new(StandardError) class ExclusiveLeaseTaken < StandardError def initialize(lease_key) @@ -153,21 +154,30 @@ def generate_remote_id [CarrierWave.generate_cache_id, SecureRandom.hex].join('-') end - def generate_final_store_path + def generate_final_store_path(root_id:) hash = Digest::SHA2.hexdigest(SecureRandom.uuid) # We prefix '@final' to prevent clashes and make the files easily recognizable # as having been created by this code. - File.join('@final', hash[0..1], hash[2..3], hash[4..]) + sub_path = File.join('@final', hash[0..1], hash[2..3], hash[4..]) + + # We generate a hashed path of the root ID (e.g. Project ID) to distribute directories instead of + # filling up one root directory with a bunch of files. + Gitlab::HashedPath.new(sub_path, root_hash: root_id).to_s end - def workhorse_authorize(has_length:, maximum_size: nil, use_final_store_path: false) + def workhorse_authorize( + has_length:, + maximum_size: nil, + use_final_store_path: false, + final_store_path_root_id: nil) {}.tap do |hash| if self.direct_upload_to_object_store? hash[:RemoteObject] = workhorse_remote_upload_options( has_length: has_length, maximum_size: maximum_size, - use_final_store_path: use_final_store_path + use_final_store_path: use_final_store_path, + final_store_path_root_id: final_store_path_root_id ) else hash[:TempPath] = workhorse_local_upload_path @@ -190,11 +200,17 @@ def object_store_config ObjectStorage::Config.new(object_store_options) end - def workhorse_remote_upload_options(has_length:, maximum_size: nil, use_final_store_path: false) + def workhorse_remote_upload_options( + has_length:, + maximum_size: nil, + use_final_store_path: false, + final_store_path_root_id: nil) return unless direct_upload_to_object_store? if use_final_store_path - id = generate_final_store_path + raise MissingFinalStorePathRootId unless final_store_path_root_id.present? + + id = generate_final_store_path(root_id: final_store_path_root_id) upload_path = with_bucket_prefix(id) prepare_pending_direct_upload(id) else diff --git a/spec/services/ci/job_artifacts/create_service_spec.rb b/spec/services/ci/job_artifacts/create_service_spec.rb index f71d7feb04ac..73cb791f1451 100644 --- a/spec/services/ci/job_artifacts/create_service_spec.rb +++ b/spec/services/ci/job_artifacts/create_service_spec.rb @@ -82,7 +82,11 @@ before do stub_artifacts_object_storage(JobArtifactUploader, direct_upload: true) - allow(JobArtifactUploader).to receive(:generate_final_store_path).and_return(final_store_path) + + allow(JobArtifactUploader) + .to receive(:generate_final_store_path) + .with(root_id: project.id) + .and_return(final_store_path) end it 'includes the authorize headers' do diff --git a/spec/uploaders/object_storage_spec.rb b/spec/uploaders/object_storage_spec.rb index 1566021934a6..a748c544bfdc 100644 --- a/spec/uploaders/object_storage_spec.rb +++ b/spec/uploaders/object_storage_spec.rb @@ -525,12 +525,14 @@ def when_file_is_in_use let(:has_length) { true } let(:maximum_size) { nil } let(:use_final_store_path) { false } + let(:final_store_path_root_id) { nil } subject do uploader_class.workhorse_authorize( has_length: has_length, maximum_size: maximum_size, - use_final_store_path: use_final_store_path + use_final_store_path: use_final_store_path, + final_store_path_root_id: final_store_path_root_id ) end @@ -615,51 +617,30 @@ def when_file_is_in_use shared_examples 'handling object storage final upload path' do |multipart| context 'when use_final_store_path is true' do let(:use_final_store_path) { true } - let(:final_store_path) { File.join('@final', 'abc', '123', 'somefilename') } + let(:final_store_path_root_id) { 12345 } + let(:final_store_path) { File.join('@final', 'myprefix', 'abc', '123', 'somefilename') } let(:escaped_path) { escape_path(final_store_path) } - before do - stub_object_storage_multipart_init_with_final_store_path("#{storage_url}#{final_store_path}") if multipart - - allow(uploader_class).to receive(:generate_final_store_path).and_return(final_store_path) - end - - it 'uses the full path instead of the temporary one' do - expect(subject[:RemoteObject][:ID]).to eq(final_store_path) + context 'and final_store_path_root_id was not given' do + let(:final_store_path_root_id) { nil } - expect(subject[:RemoteObject][:GetURL]).to include(escaped_path) - expect(subject[:RemoteObject][:StoreURL]).to include(escaped_path) - - if multipart - expect(subject[:RemoteObject][:MultipartUpload][:PartURLs]).to all(include(escaped_path)) - expect(subject[:RemoteObject][:MultipartUpload][:CompleteURL]).to include(escaped_path) - expect(subject[:RemoteObject][:MultipartUpload][:AbortURL]).to include(escaped_path) + it 'raises an error' do + expect { subject }.to raise_error(ObjectStorage::MissingFinalStorePathRootId) end - - expect(subject[:RemoteObject][:SkipDelete]).to eq(true) - - expect( - ObjectStorage::PendingDirectUpload.exists?(uploader_class.storage_location_identifier, final_store_path) - ).to eq(true) end - context 'and bucket prefix is configured' do - let(:prefixed_final_store_path) { "my/prefix/#{final_store_path}" } - let(:escaped_path) { escape_path(prefixed_final_store_path) } - + context 'and final_store_path_root_id was given' do before do - allow(uploader_class.object_store_options).to receive(:bucket_prefix).and_return('my/prefix') + stub_object_storage_multipart_init_with_final_store_path("#{storage_url}#{final_store_path}") if multipart - if multipart - stub_object_storage_multipart_init_with_final_store_path("#{storage_url}#{prefixed_final_store_path}") - end + allow(uploader_class).to receive(:generate_final_store_path) + .with(root_id: final_store_path_root_id) + .and_return(final_store_path) end - it 'sets the remote object ID to the final path without prefix' do + it 'uses the full path instead of the temporary one' do expect(subject[:RemoteObject][:ID]).to eq(final_store_path) - end - it 'returns the final path with prefix' do expect(subject[:RemoteObject][:GetURL]).to include(escaped_path) expect(subject[:RemoteObject][:StoreURL]).to include(escaped_path) @@ -668,15 +649,49 @@ def when_file_is_in_use expect(subject[:RemoteObject][:MultipartUpload][:CompleteURL]).to include(escaped_path) expect(subject[:RemoteObject][:MultipartUpload][:AbortURL]).to include(escaped_path) end - end - it 'creates the pending upload entry without the prefix' do - is_expected.to have_key(:RemoteObject) + expect(subject[:RemoteObject][:SkipDelete]).to eq(true) expect( ObjectStorage::PendingDirectUpload.exists?(uploader_class.storage_location_identifier, final_store_path) ).to eq(true) end + + context 'and bucket prefix is configured' do + let(:prefixed_final_store_path) { "my/prefix/#{final_store_path}" } + let(:escaped_path) { escape_path(prefixed_final_store_path) } + + before do + allow(uploader_class.object_store_options).to receive(:bucket_prefix).and_return('my/prefix') + + if multipart + stub_object_storage_multipart_init_with_final_store_path("#{storage_url}#{prefixed_final_store_path}") + end + end + + it 'sets the remote object ID to the final path without prefix' do + expect(subject[:RemoteObject][:ID]).to eq(final_store_path) + end + + it 'returns the final path with prefix' do + expect(subject[:RemoteObject][:GetURL]).to include(escaped_path) + expect(subject[:RemoteObject][:StoreURL]).to include(escaped_path) + + if multipart + expect(subject[:RemoteObject][:MultipartUpload][:PartURLs]).to all(include(escaped_path)) + expect(subject[:RemoteObject][:MultipartUpload][:CompleteURL]).to include(escaped_path) + expect(subject[:RemoteObject][:MultipartUpload][:AbortURL]).to include(escaped_path) + end + end + + it 'creates the pending upload entry without the bucket prefix' do + is_expected.to have_key(:RemoteObject) + + expect( + ObjectStorage::PendingDirectUpload.exists?(uploader_class.storage_location_identifier, final_store_path) + ).to eq(true) + end + end end end @@ -716,7 +731,7 @@ def escape_path(path) end before do - expect_next_instance_of(ObjectStorage::Config) do |instance| + allow_next_instance_of(ObjectStorage::Config) do |instance| allow(instance).to receive(:credentials).and_return(credentials) end end @@ -767,7 +782,7 @@ def escape_path(path) end before do - expect_next_instance_of(ObjectStorage::Config) do |instance| + allow_next_instance_of(ObjectStorage::Config) do |instance| allow(instance).to receive(:credentials).and_return(credentials) end end @@ -812,7 +827,7 @@ def escape_path(path) end before do - expect_next_instance_of(ObjectStorage::Config) do |instance| + allow_next_instance_of(ObjectStorage::Config) do |instance| allow(instance).to receive(:credentials).and_return(credentials) end end @@ -1184,14 +1199,17 @@ def escape_path(path) end describe '.generate_final_store_path' do - subject(:final_path) { uploader_class.generate_final_store_path } + let(:root_id) { 12345 } + let(:expected_root_hashed_path) { Gitlab::HashedPath.new(root_hash: root_id) } + + subject(:final_path) { uploader_class.generate_final_store_path(root_id: root_id) } before do allow(Digest::SHA2).to receive(:hexdigest).and_return('somehash1234') end - it 'returns the generated hashed path' do - expect(final_path).to eq('@final/so/me/hash1234') + it 'returns the generated hashed path nested under the hashed path of the root ID' do + expect(final_path).to eq(File.join(expected_root_hashed_path, '@final/so/me/hash1234')) end end -- GitLab