diff --git a/app/services/projects/lfs_pointers/lfs_download_link_list_service.rb b/app/services/projects/lfs_pointers/lfs_download_link_list_service.rb index c91103f897f9d363da7b39c243fe2d69173a8d92..f7de7f98768788825cda03b63bb90bc12b2bb34b 100644 --- a/app/services/projects/lfs_pointers/lfs_download_link_list_service.rb +++ b/app/services/projects/lfs_pointers/lfs_download_link_list_service.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -# This service lists the download link from a remote source based on the +# This service yields operation on each download link from a remote source based on the # oids provided module Projects module LfsPointers @@ -23,29 +23,22 @@ def initialize(project, remote_uri: nil) @remote_uri = remote_uri end - # This method accepts two parameters: # - oids: hash of oids to query. The structure is { lfs_file_oid => lfs_file_size } - # - # Returns an array of LfsDownloadObject - def execute(oids) - return [] unless project&.lfs_enabled? && remote_uri && oids.present? + # Yields operation for each link batch-by-batch + def each_link(oids, &block) + return unless project&.lfs_enabled? && remote_uri && oids.present? - get_download_links_in_batches(oids) + download_links_in_batches(oids, &block) end private - def get_download_links_in_batches(oids, batch_size = REQUEST_BATCH_SIZE) - download_links = [] - + def download_links_in_batches(oids, batch_size = REQUEST_BATCH_SIZE, &block) oids.each_slice(batch_size) do |batch| - download_links += get_download_links(batch) + download_links_for(batch).each(&block) end - - download_links - rescue DownloadLinksRequestEntityTooLargeError => e - # Log this exceptions to see how open it happens + # Log this exceptions to see how often it happens Gitlab::ErrorTracking .track_exception(e, project_id: project&.id, batch_size: batch_size, oids_count: oids.count) @@ -57,7 +50,7 @@ def get_download_links_in_batches(oids, batch_size = REQUEST_BATCH_SIZE) raise DownloadLinksError, 'Unable to download due to RequestEntityTooLarge errors' end - def get_download_links(oids) + def download_links_for(oids) response = Gitlab::HTTP.post(remote_uri, body: request_body(oids), headers: headers) diff --git a/app/services/projects/lfs_pointers/lfs_download_service.rb b/app/services/projects/lfs_pointers/lfs_download_service.rb index eaf73b78c1c49b7fbbd71d9255d8896ccec93527..26352198e5c5e391f83a25ef1bf6f31a323ba288 100644 --- a/app/services/projects/lfs_pointers/lfs_download_service.rb +++ b/app/services/projects/lfs_pointers/lfs_download_service.rb @@ -92,9 +92,15 @@ def download_options end def fetch_file(&block) + attempts ||= 1 response = Gitlab::HTTP.get(lfs_sanitized_url, download_options, &block) raise ResponseError, "Received error code #{response.code}" unless response.success? + rescue Net::OpenTimeout + raise if attempts >= 3 + + attempts += 1 + retry end def with_tmp_file diff --git a/app/services/projects/lfs_pointers/lfs_import_service.rb b/app/services/projects/lfs_pointers/lfs_import_service.rb index 3fc82f2c410fca782d6c1f7944d771e572578863..c97910410882b8376be75aa573d1fdb0137d98c3 100644 --- a/app/services/projects/lfs_pointers/lfs_import_service.rb +++ b/app/services/projects/lfs_pointers/lfs_import_service.rb @@ -9,9 +9,7 @@ class LfsImportService < BaseService def execute return success unless project&.lfs_enabled? - lfs_objects_to_download = LfsObjectDownloadListService.new(project).execute - - lfs_objects_to_download.each do |lfs_download_object| + LfsObjectDownloadListService.new(project).each_list_item do |lfs_download_object| LfsDownloadService.new(project, lfs_download_object).execute end diff --git a/app/services/projects/lfs_pointers/lfs_object_download_list_service.rb b/app/services/projects/lfs_pointers/lfs_object_download_list_service.rb index bd7598788a639aa45e60602c2aad271099d909c6..09fec9939b9cc970dd9851bc6e3016b1d31ae5fc 100644 --- a/app/services/projects/lfs_pointers/lfs_object_download_list_service.rb +++ b/app/services/projects/lfs_pointers/lfs_object_download_list_service.rb @@ -1,8 +1,8 @@ # frozen_string_literal: true -# This service manages the whole worflow of discovering the Lfs files in a -# repository, linking them to the project and downloading (and linking) the non -# existent ones. +# This service discovers the Lfs files that are linked in repository, +# but not downloaded yet and yields the operation +# on each Lfs file link (url) to remote repository. module Projects module LfsPointers class LfsObjectDownloadListService < BaseService @@ -14,30 +14,31 @@ class LfsObjectDownloadListService < BaseService LfsObjectDownloadListError = Class.new(StandardError) - def execute - return [] unless project&.lfs_enabled? - - if external_lfs_endpoint? - # If the endpoint host is different from the import_url it means - # that the repo is using a third party service for storing the LFS files. - # In this case, we have to disable lfs in the project - disable_lfs! - - return [] - end + def each_list_item(&block) + return unless context_valid? # Downloading the required information and gathering it inside an # LfsDownloadObject for each oid - # LfsDownloadLinkListService .new(project, remote_uri: current_endpoint_uri) - .execute(missing_lfs_files) + .each_link(missing_lfs_files, &block) rescue LfsDownloadLinkListService::DownloadLinksError => e raise LfsObjectDownloadListError, "The LFS objects download list couldn't be imported. Error: #{e.message}" end private + def context_valid? + return false unless project&.lfs_enabled? + return true unless external_lfs_endpoint? + + # If the endpoint host is different from the import_url it means + # that the repo is using a third party service for storing the LFS files. + # In this case, we have to disable lfs in the project + disable_lfs! + false + end + def external_lfs_endpoint? lfsconfig_endpoint_uri && lfsconfig_endpoint_uri.host != import_uri.host end diff --git a/ee/spec/services/projects/update_mirror_service_spec.rb b/ee/spec/services/projects/update_mirror_service_spec.rb index 5a71a229681437ecd8096badd5ee0f6ad300bc41..c4edd6bb04a1d494fe7f4d422f1e0caf5389bfdf 100644 --- a/ee/spec/services/projects/update_mirror_service_spec.rb +++ b/ee/spec/services/projects/update_mirror_service_spec.rb @@ -466,7 +466,7 @@ def create_file(repository) it 'updates LFS objects' do expect(Projects::LfsPointers::LfsImportService).to receive(:new).and_call_original expect_next_instance_of(Projects::LfsPointers::LfsObjectDownloadListService) do |instance| - expect(instance).to receive(:execute).and_return({}) + expect(instance).to receive(:each_list_item) end service.execute diff --git a/lib/gitlab/github_import/importer/lfs_objects_importer.rb b/lib/gitlab/github_import/importer/lfs_objects_importer.rb index 775afd5f53a3b308cd6082b5b5dfc787bed580d4..d064278e4a074fb41124282a0e4db60f6cd1f810 100644 --- a/lib/gitlab/github_import/importer/lfs_objects_importer.rb +++ b/lib/gitlab/github_import/importer/lfs_objects_importer.rb @@ -27,9 +27,9 @@ def collection_method end def each_object_to_import - lfs_objects = Projects::LfsPointers::LfsObjectDownloadListService.new(project).execute + download_service = Projects::LfsPointers::LfsObjectDownloadListService.new(project) - lfs_objects.each do |object| + download_service.each_list_item do |object| Gitlab::GithubImport::ObjectCounter.increment(project, object_type, :fetched) yield object diff --git a/spec/lib/gitlab/github_import/importer/lfs_objects_importer_spec.rb b/spec/lib/gitlab/github_import/importer/lfs_objects_importer_spec.rb index 99536588718cbfebafb6e7d351d6ad5dfa89246c..678aa705b6c4bbf6437305d0e1f85d67a31b52f8 100644 --- a/spec/lib/gitlab/github_import/importer/lfs_objects_importer_spec.rb +++ b/spec/lib/gitlab/github_import/importer/lfs_objects_importer_spec.rb @@ -58,7 +58,7 @@ expect_next_instance_of(Projects::LfsPointers::LfsObjectDownloadListService) do |service| expect(service) - .to receive(:execute) + .to receive(:each_list_item) .and_raise(exception) end @@ -79,7 +79,7 @@ expect_next_instance_of(Projects::LfsPointers::LfsObjectDownloadListService) do |service| expect(service) - .to receive(:execute) + .to receive(:each_list_item) .and_raise(exception) end @@ -94,7 +94,7 @@ lfs_object_importer = double(:lfs_object_importer) expect_next_instance_of(Projects::LfsPointers::LfsObjectDownloadListService) do |service| - expect(service).to receive(:execute).and_return([lfs_download_object]) + expect(service).to receive(:each_list_item).and_yield(lfs_download_object) end expect(Gitlab::GithubImport::Importer::LfsObjectImporter) @@ -115,7 +115,7 @@ importer = described_class.new(project, client) expect_next_instance_of(Projects::LfsPointers::LfsObjectDownloadListService) do |service| - expect(service).to receive(:execute).and_return([lfs_download_object]) + expect(service).to receive(:each_list_item).and_yield(lfs_download_object) end expect(Gitlab::GithubImport::ImportLfsObjectWorker).to receive(:bulk_perform_in) diff --git a/spec/services/projects/lfs_pointers/lfs_download_link_list_service_spec.rb b/spec/services/projects/lfs_pointers/lfs_download_link_list_service_spec.rb index d472d6493c345c448ba4a99891bde9e8872a001c..80b3c4d0403b27507d0a24dce2b4211fcd698574 100644 --- a/spec/services/projects/lfs_pointers/lfs_download_link_list_service_spec.rb +++ b/spec/services/projects/lfs_pointers/lfs_download_link_list_service_spec.rb @@ -39,7 +39,7 @@ def custom_response(net_response, body = nil) ] end - subject { described_class.new(project, remote_uri: remote_uri) } + subject(:service) { described_class.new(project, remote_uri: remote_uri) } before do allow(project).to receive(:lfs_enabled?).and_return(true) @@ -48,19 +48,24 @@ def custom_response(net_response, body = nil) allow(Gitlab::HTTP).to receive(:post).and_return(response) end - describe '#execute' do - let(:download_objects) { subject.execute(new_oids) } - + describe '#each_link' do it 'retrieves each download link of every non existent lfs object' do - download_objects.each do |lfs_download_object| - expect(lfs_download_object.link).to eq "#{import_url}/gitlab-lfs/objects/#{lfs_download_object.oid}" - end + links = [] + service.each_link(new_oids) { |lfs_download_object| links << lfs_download_object.link } + + expect(links).to contain_exactly( + "#{import_url}/gitlab-lfs/objects/oid1", + "#{import_url}/gitlab-lfs/objects/oid2" + ) end it 'stores headers' do - download_objects.each do |lfs_download_object| - expect(lfs_download_object.headers).to eq(headers) + expected_headers = [] + service.each_link(new_oids) do |lfs_download_object| + expected_headers << lfs_download_object.headers end + + expect(expected_headers).to contain_exactly(headers, headers) end context 'when lfs objects size is larger than the batch size' do @@ -97,10 +102,13 @@ def stub_request(batch, response) stub_successful_request([data[4]]) end - it 'retreives them in batches' do - subject.execute(new_oids).each do |lfs_download_object| + it 'retrieves them in batches' do + checksum = 0 + service.each_link(new_oids) do |lfs_download_object| expect(lfs_download_object.link).to eq "#{import_url}/gitlab-lfs/objects/#{lfs_download_object.oid}" + checksum += 1 end + expect(checksum).to eq new_oids.size end end @@ -127,9 +135,12 @@ def stub_request(batch, response) an_instance_of(error_class), project_id: project.id, batch_size: 5, oids_count: 5 ) - subject.execute(new_oids).each do |lfs_download_object| + checksum = 0 + service.each_link(new_oids) do |lfs_download_object| expect(lfs_download_object.link).to eq "#{import_url}/gitlab-lfs/objects/#{lfs_download_object.oid}" + checksum += 1 end + expect(checksum).to eq new_oids.size end end @@ -153,7 +164,7 @@ def stub_request(batch, response) expect(Gitlab::ErrorTracking).to receive(:track_exception).with( an_instance_of(error_class), project_id: project.id, batch_size: 2, oids_count: 5 ) - expect { subject.execute(new_oids) }.to raise_error(described_class::DownloadLinksError) + expect { service.each_link(new_oids) }.to raise_error(described_class::DownloadLinksError) end end end @@ -165,21 +176,23 @@ def stub_request(batch, response) let(:import_url) { 'http://user:password@www.gitlab.com/demo/repo.git' } it 'adds credentials to the download_link' do - result = subject.execute(new_oids) - - result.each do |lfs_download_object| + checksum = 0 + service.each_link(new_oids) do |lfs_download_object| expect(lfs_download_object.link.starts_with?('http://user:password@')).to be_truthy + checksum += 1 end + expect(checksum).to eq new_oids.size end end context 'when lfs_endpoint does not have any credentials' do it 'does not add any credentials' do - result = subject.execute(new_oids) - - result.each do |lfs_download_object| + checksum = 0 + service.each_link(new_oids) do |lfs_download_object| expect(lfs_download_object.link.starts_with?('http://user:password@')).to be_falsey + checksum += 1 end + expect(checksum).to eq new_oids.size end end end @@ -189,17 +202,18 @@ def stub_request(batch, response) let(:lfs_endpoint) { "#{import_url_with_credentials}/info/lfs/objects/batch" } it 'downloads without any credentials' do - result = subject.execute(new_oids) - - result.each do |lfs_download_object| + checksum = 0 + service.each_link(new_oids) do |lfs_download_object| expect(lfs_download_object.link.starts_with?('http://user:password@')).to be_falsey + checksum += 1 end + expect(checksum).to eq new_oids.size end end end end - describe '#get_download_links' do + describe '#download_links_for' do context 'if request fails' do before do request_timeout_net_response = Net::HTTPRequestTimeout.new('', '', '') @@ -208,7 +222,7 @@ def stub_request(batch, response) end it 'raises an error' do - expect { subject.send(:get_download_links, new_oids) }.to raise_error(described_class::DownloadLinksError) + expect { subject.send(:download_links_for, new_oids) }.to raise_error(described_class::DownloadLinksError) end end @@ -218,7 +232,7 @@ def stub_request(batch, response) allow(response).to receive(:body).and_return(body) allow(Gitlab::HTTP).to receive(:post).and_return(response) - expect { subject.send(:get_download_links, new_oids) }.to raise_error(described_class::DownloadLinksError) + expect { subject.send(:download_links_for, new_oids) }.to raise_error(described_class::DownloadLinksError) end end diff --git a/spec/services/projects/lfs_pointers/lfs_download_service_spec.rb b/spec/services/projects/lfs_pointers/lfs_download_service_spec.rb index 6c7164c5e06272ffd8618a971a628cd9014d3f98..c815ad3884317020f0abad479b84ec263ba456df 100644 --- a/spec/services/projects/lfs_pointers/lfs_download_service_spec.rb +++ b/spec/services/projects/lfs_pointers/lfs_download_service_spec.rb @@ -108,7 +108,7 @@ end end - context 'when file download fails' do + context 'when file downloading response code is not success' do before do allow(Gitlab::HTTP).to receive(:get).and_return(code: 500, 'success?' => false) end @@ -122,6 +122,20 @@ end end + context 'when file downloading request timeout few times' do + before do + allow(Gitlab::HTTP).to receive(:get).and_raise(Net::OpenTimeout) + end + + it_behaves_like 'no lfs object is created' + + it 'retries to get LFS object 3 times before raising exception' do + subject.execute + + expect(Gitlab::HTTP).to have_received(:get).exactly(3).times + end + end + context 'when file download returns a redirect' do let(:redirect_link) { 'http://external-link' } diff --git a/spec/services/projects/lfs_pointers/lfs_import_service_spec.rb b/spec/services/projects/lfs_pointers/lfs_import_service_spec.rb index b36b0b8d6b28f929092dff49322ae35801981e41..32b86ade81ea456ecb5c4c7a7123865b9492fb5b 100644 --- a/spec/services/projects/lfs_pointers/lfs_import_service_spec.rb +++ b/spec/services/projects/lfs_pointers/lfs_import_service_spec.rb @@ -5,7 +5,12 @@ let(:project) { create(:project) } let(:user) { project.creator } let(:import_url) { 'http://www.gitlab.com/demo/repo.git' } - let(:oid_download_links) { { 'oid1' => "#{import_url}/gitlab-lfs/objects/oid1", 'oid2' => "#{import_url}/gitlab-lfs/objects/oid2" } } + let(:oid_download_links) do + [ + { 'oid1' => "#{import_url}/gitlab-lfs/objects/oid1" }, + { 'oid2' => "#{import_url}/gitlab-lfs/objects/oid2" } + ] + end subject { described_class.new(project, user) } @@ -17,7 +22,8 @@ it 'downloads lfs objects' do service = double expect_next_instance_of(Projects::LfsPointers::LfsObjectDownloadListService) do |instance| - expect(instance).to receive(:execute).and_return(oid_download_links) + expect(instance).to receive(:each_list_item) + .and_yield(oid_download_links[0]).and_yield(oid_download_links[1]) end expect(Projects::LfsPointers::LfsDownloadService).to receive(:new).and_return(service).twice expect(service).to receive(:execute).twice @@ -30,7 +36,7 @@ context 'when no downloadable lfs object links' do it 'does not call LfsDownloadService' do expect_next_instance_of(Projects::LfsPointers::LfsObjectDownloadListService) do |instance| - expect(instance).to receive(:execute).and_return({}) + expect(instance).to receive(:each_list_item) end expect(Projects::LfsPointers::LfsDownloadService).not_to receive(:new) @@ -44,7 +50,7 @@ it 'returns error' do error_message = "error message" expect_next_instance_of(Projects::LfsPointers::LfsObjectDownloadListService) do |instance| - expect(instance).to receive(:execute).and_raise(StandardError, error_message) + expect(instance).to receive(:each_list_item).and_raise(StandardError, error_message) end result = subject.execute diff --git a/spec/services/projects/lfs_pointers/lfs_object_download_list_service_spec.rb b/spec/services/projects/lfs_pointers/lfs_object_download_list_service_spec.rb index adcc2b857069b03f47373d28a90e1c2dccad3d8d..59eb1ed7a2915ab7449789f272983ffcc3e8240c 100644 --- a/spec/services/projects/lfs_pointers/lfs_object_download_list_service_spec.rb +++ b/spec/services/projects/lfs_pointers/lfs_object_download_list_service_spec.rb @@ -9,7 +9,13 @@ let!(:lfs_objects_project) { create_list(:lfs_objects_project, 2, project: project) } let!(:existing_lfs_objects) { LfsObject.pluck(:oid, :size).to_h } let(:oids) { { 'oid1' => 123, 'oid2' => 125 } } - let(:oid_download_links) { { 'oid1' => "#{import_url}/gitlab-lfs/objects/oid1", 'oid2' => "#{import_url}/gitlab-lfs/objects/oid2" } } + let(:oid_download_links) do + [ + { 'oid1' => "#{import_url}/gitlab-lfs/objects/oid1" }, + { 'oid2' => "#{import_url}/gitlab-lfs/objects/oid2" } + ] + end + let(:all_oids) { existing_lfs_objects.merge(oids) } let(:remote_uri) { URI.parse(lfs_endpoint) } @@ -21,17 +27,24 @@ allow_any_instance_of(Projects::LfsPointers::LfsListService).to receive(:execute).and_return(all_oids) end - describe '#execute' do + describe '#each_list_item' do context 'when no lfs pointer is linked' do before do - allow_any_instance_of(Projects::LfsPointers::LfsDownloadLinkListService).to receive(:execute).and_return(oid_download_links) - expect(Projects::LfsPointers::LfsDownloadLinkListService).to receive(:new).with(project, remote_uri: URI.parse(default_endpoint)).and_call_original + allow_any_instance_of(Projects::LfsPointers::LfsDownloadLinkListService) + .to receive(:each_link).with(oids) + .and_yield(oid_download_links[0]) + .and_yield(oid_download_links[1]) end it 'retrieves all lfs pointers in the project repository' do + expect(Projects::LfsPointers::LfsDownloadLinkListService) + .to receive(:new).with(project, remote_uri: URI.parse(default_endpoint)) + .and_call_original expect_any_instance_of(Projects::LfsPointers::LfsListService).to receive(:execute) - subject.execute + checksum = 0 + subject.each_list_item { |lfs_object| checksum += 1 } + expect(checksum).to eq 2 end context 'when no LFS objects exist' do @@ -40,17 +53,23 @@ end it 'retrieves all LFS objects' do - expect_any_instance_of(Projects::LfsPointers::LfsDownloadLinkListService).to receive(:execute).with(all_oids) + expect(Projects::LfsPointers::LfsDownloadLinkListService) + .to receive(:new).with(project, remote_uri: URI.parse(default_endpoint)).and_call_original + expect_any_instance_of(Projects::LfsPointers::LfsDownloadLinkListService) + .to receive(:each_link).with(all_oids) - subject.execute + subject.each_list_item {} end end context 'when some LFS objects already exist' do it 'retrieves the download links of non-existent objects' do - expect_any_instance_of(Projects::LfsPointers::LfsDownloadLinkListService).to receive(:execute).with(oids) + expect_any_instance_of(Projects::LfsPointers::LfsDownloadLinkListService) + .to receive(:each_link).with(oids) - subject.execute + checksum = 0 + subject.each_list_item { |lfs_object| checksum += 1 } + expect(checksum).to eq 2 end end end @@ -62,16 +81,15 @@ context 'when url points to the same import url host' do let(:lfs_endpoint) { "#{import_url}/different_endpoint" } - let(:service) { double } - - before do - allow(service).to receive(:execute) - end + let(:service) { instance_double(Projects::LfsPointers::LfsDownloadLinkListService, each_link: nil) } it 'downloads lfs object using the new endpoint' do - expect(Projects::LfsPointers::LfsDownloadLinkListService).to receive(:new).with(project, remote_uri: remote_uri).and_return(service) + expect(Projects::LfsPointers::LfsDownloadLinkListService) + .to receive(:new) + .with(project, remote_uri: remote_uri) + .and_return(service) - subject.execute + subject.each_list_item {} end context 'when import url has credentials' do @@ -79,10 +97,14 @@ it 'adds the credentials to the new endpoint' do expect(Projects::LfsPointers::LfsDownloadLinkListService) - .to receive(:new).with(project, remote_uri: URI.parse("http://user:password@www.gitlab.com/demo/repo.git/different_endpoint")) + .to receive(:new) + .with( + project, + remote_uri: URI.parse("http://user:password@www.gitlab.com/demo/repo.git/different_endpoint") + ) .and_return(service) - subject.execute + subject.each_list_item {} end context 'when url has its own credentials' do @@ -93,7 +115,7 @@ .to receive(:new).with(project, remote_uri: remote_uri) .and_return(service) - subject.execute + subject.each_list_item {} end end end @@ -105,7 +127,7 @@ it 'disables lfs from the project' do expect(project.lfs_enabled?).to be_truthy - subject.execute + subject.each_list_item {} expect(project.lfs_enabled?).to be_falsey end @@ -113,7 +135,7 @@ it 'does not download anything' do expect_any_instance_of(Projects::LfsPointers::LfsListService).not_to receive(:execute) - subject.execute + subject.each_list_item {} end end end