Skip to content
代码片段 群组 项目
未验证 提交 981cb56c 编辑于 作者: drew stachon's avatar drew stachon 提交者: GitLab
浏览文件

Merge branch 'eb-orphan-clean-up-memory-fix' into 'master'

分支
标签
无相关合并请求
...@@ -4,8 +4,11 @@ module Gitlab ...@@ -4,8 +4,11 @@ module Gitlab
module Cleanup module Cleanup
module OrphanJobArtifactFinalObjects module OrphanJobArtifactFinalObjects
class BatchFromList class BatchFromList
include Gitlab::Utils::StrongMemoize
include StorageHelpers include StorageHelpers
GOOGLE_PROVIDER = 'google'
def initialize(entries, logger: Gitlab::AppLogger) def initialize(entries, logger: Gitlab::AppLogger)
@entries = entries @entries = entries
@logger = logger @logger = logger
...@@ -50,8 +53,9 @@ def each_fog_file ...@@ -50,8 +53,9 @@ def each_fog_file
entries.each do |entry| entries.each do |entry|
# NOTE: If the object store is configured to use bucket prefix, the GenerateList task # NOTE: If the object store is configured to use bucket prefix, the GenerateList task
# would have included the bucket_prefix in paths in the orphans list CSV. # would have included the bucket_prefix in paths in the orphans list CSV.
path_with_bucket_prefix, _ = entry.split(',') path_with_bucket_prefix, size = entry.split(',')
fog_file = artifacts_directory.files.get(path_with_bucket_prefix)
fog_file = build_fog_file(path_with_bucket_prefix, size)
if fog_file if fog_file
yield fog_file yield fog_file
...@@ -61,6 +65,23 @@ def each_fog_file ...@@ -61,6 +65,23 @@ def each_fog_file
end end
end end
def build_fog_file(path, size)
if google_provider?
# For Google provider, we support rollback of deletions, thus we need to fetch
# the `generation` attribute of the object before we delete it.
# Here we use `metadata` instead of `get` because we only want to get the metadata and
# not download the object content. Note that `files.metadata` is only available in `fog-google`.
artifacts_directory.files.metadata(path)
else
artifacts_directory.files.new(key: path, content_length: size)
end
end
def google_provider?
configuration.connection.provider.downcase == GOOGLE_PROVIDER
end
strong_memoize_attr :google_provider?
def path_without_bucket_prefix(path) def path_without_bucket_prefix(path)
# `path` contains the fog file's key. It is the object path relative to the artifacts bucket, for example: # `path` contains the fog file's key. It is the object path relative to the artifacts bucket, for example:
# aa/bb/abc123/@final/12/34/def12345 # aa/bb/abc123/@final/12/34/def12345
......
...@@ -48,7 +48,26 @@ ...@@ -48,7 +48,26 @@
subject(:orphan_objects) { batch.orphan_objects } subject(:orphan_objects) { batch.orphan_objects }
shared_examples_for 'returning orphan final job artifact objects' do shared_examples_for 'returning orphan final job artifact objects' do
it 'returns all existing orphan Fog files from the given CSV entries' do context 'when the configured object storage provider is google' do
before do
allow(Gitlab.config.artifacts.object_store.connection).to receive(:provider).and_return('Google')
# Given Fog doesn't have mock implementation for Google provider, we can only
# check that the metadata method is properly called with the correct parameters.
allow(batch).to receive_message_chain(:artifacts_directory, :files, :metadata)
.with(orphan_final_object_1.key).and_return(orphan_final_object_1)
allow(batch).to receive_message_chain(:artifacts_directory, :files, :metadata)
.with(orphan_final_object_2.key).and_return(orphan_final_object_2)
allow(batch).to receive_message_chain(:artifacts_directory, :files, :metadata)
.with(non_existent_object.key).and_return(nil)
allow(batch).to receive_message_chain(:artifacts_directory, :files, :metadata)
.with(object_with_job_artifact_record.key).and_return(object_with_job_artifact_record)
end
it 'only returns existing orphan Fog files from the given CSV entries' do
expect(orphan_objects).to contain_exactly(orphan_final_object_1, orphan_final_object_2) expect(orphan_objects).to contain_exactly(orphan_final_object_1, orphan_final_object_2)
expect_skipping_non_existent_object_log_message(non_existent_object) expect_skipping_non_existent_object_log_message(non_existent_object)
...@@ -56,6 +75,15 @@ ...@@ -56,6 +75,15 @@
end end
end end
context 'when the configured object storage provider is not google' do
it 'returns all orphan Fog files from the given CSV entries' do
expect(orphan_objects).to contain_exactly(orphan_final_object_1, orphan_final_object_2, non_existent_object)
expect_skipping_object_with_job_artifact_record_log_message(object_with_job_artifact_record)
end
end
end
context 'when not configured to use bucket_prefix' do context 'when not configured to use bucket_prefix' do
let(:remote_directory) { 'artifacts' } let(:remote_directory) { 'artifacts' }
let(:bucket_prefix) { nil } let(:bucket_prefix) { nil }
......
...@@ -128,13 +128,16 @@ ...@@ -128,13 +128,16 @@
run run
end end
it 'does not fail but does not log the non-existent path to the deleted list' do # NOTE: This behavior doesn't apply to GCP but we can't add a test here because
expect_no_deleted_object_log_message(orphan_final_object_1) # fog doesn't have a mock implementation for it. We can only test AWS behavior.
it 'does not fail and still logs the non-existent path to the deleted list' do
expect_deleted_object_log_message(orphan_final_object_1)
expect_deleted_object_log_message(orphan_final_object_2) expect_deleted_object_log_message(orphan_final_object_2)
expect_deleted_object_log_message(orphan_final_object_3) expect_deleted_object_log_message(orphan_final_object_3)
expect_deleted_object_log_message(orphan_final_object_4) expect_deleted_object_log_message(orphan_final_object_4)
expect_deleted_list_to_contain_exactly(deleted_list_filename, [ expect_deleted_list_to_contain_exactly(deleted_list_filename, [
orphan_final_object_1,
orphan_final_object_2, orphan_final_object_2,
orphan_final_object_3, orphan_final_object_3,
orphan_final_object_4 orphan_final_object_4
...@@ -208,9 +211,6 @@ ...@@ -208,9 +211,6 @@
orphan_final_object_2 orphan_final_object_2
]) ])
expect_deleted_object_log_message(orphan_final_object_1)
expect_deleted_object_log_message(orphan_final_object_2)
new_processor = described_class.new( new_processor = described_class.new(
force_restart: true, force_restart: true,
filename: orphan_list_filename filename: orphan_list_filename
...@@ -219,11 +219,16 @@ ...@@ -219,11 +219,16 @@
new_processor.run! new_processor.run!
expect_no_resuming_from_marker_log_message expect_no_resuming_from_marker_log_message
expect_deleted_object_log_message(orphan_final_object_1, times: 2)
expect_deleted_object_log_message(orphan_final_object_2, times: 2)
expect_deleted_object_log_message(orphan_final_object_3) expect_deleted_object_log_message(orphan_final_object_3)
expect_deleted_object_log_message(orphan_final_object_4) expect_deleted_object_log_message(orphan_final_object_4)
# The previous objects that were deleted in the 1st run shouldn't appear here anymore # The previous objects that were deleted in the 1st run will still appear here
# because this is AWS behavior. But for GCP provider, they shouldn't anymore.
expect_deleted_list_to_contain_exactly(deleted_list_filename, [ expect_deleted_list_to_contain_exactly(deleted_list_filename, [
orphan_final_object_1,
orphan_final_object_2,
orphan_final_object_3, orphan_final_object_3,
orphan_final_object_4 orphan_final_object_4
]) ])
......
0% 加载中 .
You are about to add 0 people to the discussion. Proceed with caution.
想要评论请 注册