diff --git a/ee/app/models/concerns/geo/verifiable_replicator.rb b/ee/app/models/concerns/geo/verifiable_replicator.rb index 7512813f6c1b7b12a9ca33c4b1fc26e3dfeed603..d87ae9b08aad8faedda78e8e8c570708b952e694 100644 --- a/ee/app/models/concerns/geo/verifiable_replicator.rb +++ b/ee/app/models/concerns/geo/verifiable_replicator.rb @@ -313,6 +313,23 @@ def ok_to_skip_download? resource_exists? && skip_download_can_rely_on_verification? end + # Downloading replicators need to verify the state of the resource and its association + # prior to attempting downloads to avoid triggering exceptions. Call this method + # to trigger validation and receive an error detail message string if any issues are present. + # + # Override this method in subclasses to provide more context-specific error details + # + # NOTE: we've kept this implementation pretty simple so far for the sake of + # detecting problems with replicating Upload and other blob data types. + # If we find later that we need to add this more extensively, or detect + # multiple errors (ala ActiveRecord models), we should revisit and refactor + # this implementation + # + # @return [String|nil] an error message String if validation fails, nil if validation succeeds + def predownload_validation_failure + nil + end + # @abstract # @return [String] a checksum representing the data def calculate_checksum diff --git a/ee/app/replicators/geo/upload_replicator.rb b/ee/app/replicators/geo/upload_replicator.rb index bc1a29cefcb8af62cee7c90eea49c483f8f098e5..71b5933b1e489935b988e26931bad81d7b88672e 100644 --- a/ee/app/replicators/geo/upload_replicator.rb +++ b/ee/app/replicators/geo/upload_replicator.rb @@ -17,5 +17,21 @@ def self.verification_feature_flag_enabled? def carrierwave_uploader model_record.retrieve_uploader end + + # Do not allow download unless the upload's owner `model` is present. + # Otherwise, attempting to build file paths will raise an exception. + def predownload_validation_failure + error_message = super + return error_message if error_message + + upload = model_record + + unless upload.model.present? + missing_model = "#{upload.model_type} with ID #{upload.model_id}" + return "The model which owns Upload with ID #{upload.id} is missing: #{missing_model}" + end + + nil + end end end diff --git a/ee/app/services/geo/blob_download_service.rb b/ee/app/services/geo/blob_download_service.rb index 88b402145d517ac4446df4a7c433fd48dcf91875..40f7885182fee1a98679c019d1508d2122862f86 100644 --- a/ee/app/services/geo/blob_download_service.rb +++ b/ee/app/services/geo/blob_download_service.rb @@ -25,33 +25,37 @@ def initialize(replicator:) def execute try_obtain_lease do start_time = Time.current + sync_successful = false registry.start! - download_result = ::Gitlab::Geo::Replication::BlobDownloader.new(replicator: @replicator).execute - - mark_as_synced = download_result.success - - if mark_as_synced - registry.synced! - else - message = download_result.reason - error = download_result.extra_details&.delete(:error) - - if error - Gitlab::ErrorTracking.track_exception( - error, - replicable_name: @replicator.replicable_name, - model_record_id: @replicator.model_record_id - ) + begin + downloader = ::Gitlab::Geo::Replication::BlobDownloader.new(replicator: @replicator) + download_result = downloader.execute + + sync_successful = process_download_result(download_result) + rescue StandardError => error + # if an exception raises here, it will be stuck in "started" state until + # the cleanup process forces it to failed much later. + # To avoid that, catch the error, mark sync as + # failed, and re-raise the exception here. + registry.failed!( + message: "Error while attempting to sync", + error: error + ) + track_exception(error) + + raise error + ensure + # make sure we're not stuck in a started state still + if registry.started? + sync_successful ? registry.synced! : registry.failed!(message: "Unknown system error") end - - registry.failed!(message: message, error: error, missing_on_primary: download_result.primary_missing_file) end - log_download(mark_as_synced, download_result, start_time) + log_download(sync_successful, download_result, start_time) - !!mark_as_synced + sync_successful end end @@ -61,6 +65,22 @@ def registry @registry ||= @replicator.registry end + def process_download_result(download_result) + if download_result.success + registry.synced! + return true + end + + message = download_result.reason + error = download_result.extra_details&.delete(:error) + + track_exception(error) if error + + registry.failed!(message: message, error: error, missing_on_primary: download_result.primary_missing_file) + + false + end + def log_download(mark_as_synced, download_result, start_time) metadata = { replicable_name: @replicator.replicable_name, @@ -77,6 +97,14 @@ def log_download(mark_as_synced, download_result, start_time) log_info("Blob download", metadata) end + def track_exception(exception) + Gitlab::ErrorTracking.track_exception( + exception, + replicable_name: @replicator.replicable_name, + model_record_id: @replicator.model_record_id + ) + end + def lease_key @lease_key ||= "#{self.class.name.underscore}:#{@replicator.replicable_name}:#{@replicator.model_record.id}" end diff --git a/ee/lib/gitlab/geo/replication/blob_downloader.rb b/ee/lib/gitlab/geo/replication/blob_downloader.rb index 3b652d5d1006050bcae6f7c43a2428e321c87079..9adc36d160729d2006f60a1b1e26aefcbbfcf541 100644 --- a/ee/lib/gitlab/geo/replication/blob_downloader.rb +++ b/ee/lib/gitlab/geo/replication/blob_downloader.rb @@ -86,6 +86,11 @@ def check_preconditions return failure_result(reason: 'Skipping transfer as there is no Primary node to download from') end + predownload_error_message = replicator.predownload_validation_failure + if predownload_error_message.present? + return failure_result(reason: "Skipping transfer due to validation error: #{predownload_error_message}") + end + if file_storage? if File.directory?(absolute_path) return failure_result(reason: 'Skipping transfer as destination exist and is a directory') diff --git a/ee/spec/replicators/geo/upload_replicator_spec.rb b/ee/spec/replicators/geo/upload_replicator_spec.rb index 8930d639631c08d132124297c47068da61e1fe02..4fcc519fea36d48e19d746f595cc374c1d794ad4 100644 --- a/ee/spec/replicators/geo/upload_replicator_spec.rb +++ b/ee/spec/replicators/geo/upload_replicator_spec.rb @@ -6,4 +6,29 @@ let(:model_record) { create(:upload, :with_file) } include_examples 'a blob replicator' + + describe "predownload_validation_failure" do + context "when upload is valid and has an associated model/owner" do + it "returns nil" do + expect(replicator.predownload_validation_failure).to be_nil + end + end + + context "when upload is orphaned from its own model association" do + before do + # break the model association on the upload + model_record.model_id = -1 + model_record.save!(validate: false) + model_record.reload + end + + it "returns an error string" do + upload = model_record + missing_model = "#{upload.model_type} with ID #{upload.model_id}" + expect(replicator.predownload_validation_failure).to eq( + "The model which owns Upload with ID #{upload.id} is missing: #{missing_model}" + ) + end + end + end end diff --git a/ee/spec/services/geo/blob_download_service_spec.rb b/ee/spec/services/geo/blob_download_service_spec.rb index fc63e943d1039d901c4926247267924356d3eeac..27e3a4e0f8242732a3e97f9a709923ce0af876c3 100644 --- a/ee/spec/services/geo/blob_download_service_spec.rb +++ b/ee/spec/services/geo/blob_download_service_spec.rb @@ -22,24 +22,23 @@ describe "#execute" do let(:downloader) { double(:downloader) } - before do - expect(downloader).to receive(:execute).and_return(result) - expect(::Gitlab::Geo::Replication::BlobDownloader).to receive(:new).and_return(downloader) - end - - context 'when exception is raised' do + context 'when the downloader result object contains an error' do let(:error) { StandardError.new('Error') } let(:result) do double( :result, success: false, - primary_missing_file: - false, + primary_missing_file: false, bytes_downloaded: 0, reason: 'foo', extra_details: { error: error }) end + before do + expect(downloader).to receive(:execute).and_return(result) + expect(::Gitlab::Geo::Replication::BlobDownloader).to receive(:new).and_return(downloader) + end + it 'tracks exception' do expect(Gitlab::ErrorTracking).to receive(:track_exception).with( error, @@ -70,7 +69,64 @@ end end + context 'when the replicator fails pre-download validation' do + before do + expect(replicator).to receive(:predownload_validation_failure).and_return( + "This upload is busted" + ) + end + + it "creates the registry" do + expect do + subject.execute + end.to change { registry_class.count }.by(1) + end + + it "sets sync state to failed" do + subject.execute + + expect(registry_class.last).to be_failed + end + + it "captures the error details in the registry record" do + subject.execute + expect(registry_class.last.last_sync_failure).to include("This upload is busted") + end + end + + context 'when exception is raised by the downloader' do + let(:error) { StandardError.new('Some data inconsistency') } + + before do + expect(downloader).to receive(:execute).and_raise(error) + expect(::Gitlab::Geo::Replication::BlobDownloader).to receive(:new).and_return(downloader) + end + + it 'marks the replicator registry record as failed' do + expect { subject.execute }.to raise_error(error) + + registry = replicator.registry + expect(registry).to be_failed + expect(registry.last_sync_failure).to include("Error while attempting to sync") + end + + it 'reports the exception' do + expect(Gitlab::ErrorTracking).to receive(:track_exception).with( + error, + replicable_name: replicator.replicable_name, + model_record_id: replicator.model_record_id + ) + + expect { subject.execute }.to raise_error(error) + end + end + context "when it can obtain the exclusive lease" do + before do + expect(downloader).to receive(:execute).and_return(result) + expect(::Gitlab::Geo::Replication::BlobDownloader).to receive(:new).and_return(downloader) + end + context "when the registry record does not exist" do context "when the downloader returns success" do let(:result) { double(:result, success: true, primary_missing_file: false, bytes_downloaded: 123, reason: nil, extra_details: nil) }