diff --git a/Gemfile.lock b/Gemfile.lock index 29cdca969fcb0553510221800ecefc4769b0615c..447c5d292d59bb889955d5f228cdc2f4b679f1e6 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -152,7 +152,7 @@ PATH PATH remote: vendor/gems/sidekiq-reliable-fetch specs: - gitlab-sidekiq-fetcher (0.9.0) + gitlab-sidekiq-fetcher (0.10.0) json (>= 2.5) sidekiq (~> 6.1) diff --git a/app/workers/concerns/gitlab/github_import/stage_methods.rb b/app/workers/concerns/gitlab/github_import/stage_methods.rb index 1414ff8d6bdc88128183d3f8d36c1c16acef742f..5c63c667a03ff6a02645c75d194be85d7b0769cd 100644 --- a/app/workers/concerns/gitlab/github_import/stage_methods.rb +++ b/app/workers/concerns/gitlab/github_import/stage_methods.rb @@ -5,6 +5,8 @@ module GithubImport module StageMethods extend ActiveSupport::Concern + MAX_RETRIES_AFTER_INTERRUPTION = 20 + included do include ApplicationWorker @@ -18,6 +20,29 @@ module StageMethods end end + class_methods do + # We can increase the number of times a GitHubImport::Stage worker is retried + # after being interrupted if the importer it executes can restart exactly + # from where it left off. + # + # It is not safe to call this method if the importer loops over its data from + # the beginning when restarted, even if it skips data that is already imported + # inside the loop, as there is a possibility the importer will never reach + # the end of the loop. + # + # Examples of stage workers that call this method are ones that execute services that: + # + # - Continue paging an endpoint from where it left off: + # https://gitlab.com/gitlab-org/gitlab/-/blob/487521cc/lib/gitlab/github_import/parallel_scheduling.rb#L114-117 + # - Continue their loop from where it left off: + # https://gitlab.com/gitlab-org/gitlab/-/blob/024235ec/lib/gitlab/github_import/importer/pull_requests/review_requests_importer.rb#L15 + def resumes_work_when_interrupted! + return unless Feature.enabled?(:github_importer_raise_max_interruptions) + + sidekiq_options max_retries_after_interruption: MAX_RETRIES_AFTER_INTERRUPTION + end + end + # project_id - The ID of the GitLab project to import the data into. def perform(project_id) info(project_id, message: 'starting stage') diff --git a/app/workers/gitlab/github_import/stage/import_attachments_worker.rb b/app/workers/gitlab/github_import/stage/import_attachments_worker.rb index f9952f04e998e981e37c74f4da00d09bf5710ca7..6f8190fc7e69d2ad09c8a803fe115cf9d375bd32 100644 --- a/app/workers/gitlab/github_import/stage/import_attachments_worker.rb +++ b/app/workers/gitlab/github_import/stage/import_attachments_worker.rb @@ -11,6 +11,8 @@ class ImportAttachmentsWorker # rubocop:disable Scalability/IdempotentWorker include GithubImport::Queue include StageMethods + resumes_work_when_interrupted! + # client - An instance of Gitlab::GithubImport::Client. # project - An instance of Project. def import(client, project) diff --git a/app/workers/gitlab/github_import/stage/import_issue_events_worker.rb b/app/workers/gitlab/github_import/stage/import_issue_events_worker.rb index c80412d941b89aa900949f422be80b842355fe2b..77d286dc466b9ed989ffc1d71db8a205f9de2ae1 100644 --- a/app/workers/gitlab/github_import/stage/import_issue_events_worker.rb +++ b/app/workers/gitlab/github_import/stage/import_issue_events_worker.rb @@ -11,6 +11,8 @@ class ImportIssueEventsWorker # rubocop:disable Scalability/IdempotentWorker include GithubImport::Queue include StageMethods + resumes_work_when_interrupted! + # client - An instance of Gitlab::GithubImport::Client. # project - An instance of Project. def import(client, project) diff --git a/app/workers/gitlab/github_import/stage/import_issues_and_diff_notes_worker.rb b/app/workers/gitlab/github_import/stage/import_issues_and_diff_notes_worker.rb index 592b789cc946dfb25cf4dce35281a923f7cce37a..e70f10e3ce9f3161a21258a34148b0a18f94f1d5 100644 --- a/app/workers/gitlab/github_import/stage/import_issues_and_diff_notes_worker.rb +++ b/app/workers/gitlab/github_import/stage/import_issues_and_diff_notes_worker.rb @@ -11,6 +11,8 @@ class ImportIssuesAndDiffNotesWorker # rubocop:disable Scalability/IdempotentWor include GithubImport::Queue include StageMethods + resumes_work_when_interrupted! + # client - An instance of Gitlab::GithubImport::Client. # project - An instance of Project. def import(client, project) diff --git a/app/workers/gitlab/github_import/stage/import_lfs_objects_worker.rb b/app/workers/gitlab/github_import/stage/import_lfs_objects_worker.rb index e89a850c991bb722681f1790c59de321963b1312..9db72de59b751a2dec16b075006ea33e9980c644 100644 --- a/app/workers/gitlab/github_import/stage/import_lfs_objects_worker.rb +++ b/app/workers/gitlab/github_import/stage/import_lfs_objects_worker.rb @@ -11,6 +11,11 @@ class ImportLfsObjectsWorker # rubocop:disable Scalability/IdempotentWorker include GithubImport::Queue include StageMethods + # Importer::LfsObjectsImporter can resume work when interrupted as + # it uses Projects::LfsPointers::LfsObjectDownloadListService which excludes LFS objects that already exist. + # https://gitlab.com/gitlab-org/gitlab/-/blob/eabf0800/app/services/projects/lfs_pointers/lfs_object_download_list_service.rb#L69-71 + resumes_work_when_interrupted! + def perform(project_id) return unless (project = find_project(project_id)) diff --git a/app/workers/gitlab/github_import/stage/import_notes_worker.rb b/app/workers/gitlab/github_import/stage/import_notes_worker.rb index c1fdb76d03eb2defa467f9d0979c2922e6d0c2e6..8e88034ba15d2000e9fe296b2993d3b9eed2fcf8 100644 --- a/app/workers/gitlab/github_import/stage/import_notes_worker.rb +++ b/app/workers/gitlab/github_import/stage/import_notes_worker.rb @@ -11,6 +11,8 @@ class ImportNotesWorker # rubocop:disable Scalability/IdempotentWorker include GithubImport::Queue include StageMethods + resumes_work_when_interrupted! + # client - An instance of Gitlab::GithubImport::Client. # project - An instance of Project. def import(client, project) diff --git a/app/workers/gitlab/github_import/stage/import_pull_requests_merged_by_worker.rb b/app/workers/gitlab/github_import/stage/import_pull_requests_merged_by_worker.rb index 889f938318f2e6e7a249fe4e2971324e253e6531..376581c633f889bb53bc1b3ef4fb8afdee5c9df8 100644 --- a/app/workers/gitlab/github_import/stage/import_pull_requests_merged_by_worker.rb +++ b/app/workers/gitlab/github_import/stage/import_pull_requests_merged_by_worker.rb @@ -11,6 +11,8 @@ class ImportPullRequestsMergedByWorker # rubocop:disable Scalability/IdempotentW include GithubImport::Queue include StageMethods + resumes_work_when_interrupted! + # client - An instance of Gitlab::GithubImport::Client. # project - An instance of Project. def import(client, project) diff --git a/app/workers/gitlab/github_import/stage/import_pull_requests_review_requests_worker.rb b/app/workers/gitlab/github_import/stage/import_pull_requests_review_requests_worker.rb index 44cd7cdb9d152f719bcc1557e56dad15c07fd378..f2907006d9cddc9e8d229d9049580b73942ed3c0 100644 --- a/app/workers/gitlab/github_import/stage/import_pull_requests_review_requests_worker.rb +++ b/app/workers/gitlab/github_import/stage/import_pull_requests_review_requests_worker.rb @@ -11,6 +11,8 @@ class ImportPullRequestsReviewRequestsWorker # rubocop:disable Scalability/Idemp include GithubImport::Queue include StageMethods + resumes_work_when_interrupted! + # client - An instance of Gitlab::GithubImport::Client. # project - An instance of Project. def import(client, project) diff --git a/app/workers/gitlab/github_import/stage/import_pull_requests_reviews_worker.rb b/app/workers/gitlab/github_import/stage/import_pull_requests_reviews_worker.rb index 9947a89b92ce9fab9688188fb1f8be8f88ad07d5..5c516555387e62ac0411322f13c92f1199362b37 100644 --- a/app/workers/gitlab/github_import/stage/import_pull_requests_reviews_worker.rb +++ b/app/workers/gitlab/github_import/stage/import_pull_requests_reviews_worker.rb @@ -11,6 +11,8 @@ class ImportPullRequestsReviewsWorker # rubocop:disable Scalability/IdempotentWo include GithubImport::Queue include StageMethods + resumes_work_when_interrupted! + # client - An instance of Gitlab::GithubImport::Client. # project - An instance of Project. def import(client, project) diff --git a/app/workers/gitlab/github_import/stage/import_pull_requests_worker.rb b/app/workers/gitlab/github_import/stage/import_pull_requests_worker.rb index 9bdf3a317764759eaedf9b31063f37dbdc2ed931..50527dfa2d84cea2ca221b7c8092e63483eb3a78 100644 --- a/app/workers/gitlab/github_import/stage/import_pull_requests_worker.rb +++ b/app/workers/gitlab/github_import/stage/import_pull_requests_worker.rb @@ -11,6 +11,8 @@ class ImportPullRequestsWorker # rubocop:disable Scalability/IdempotentWorker include GithubImport::Queue include StageMethods + resumes_work_when_interrupted! + # client - An instance of Gitlab::GithubImport::Client. # project - An instance of Project. def import(client, project) diff --git a/config/feature_flags/development/github_importer_raise_max_interruptions.yml b/config/feature_flags/development/github_importer_raise_max_interruptions.yml new file mode 100644 index 0000000000000000000000000000000000000000..3cbcc10865fd4a02f3617f3e77bb3cfdac0444c8 --- /dev/null +++ b/config/feature_flags/development/github_importer_raise_max_interruptions.yml @@ -0,0 +1,8 @@ +--- +name: github_importer_raise_max_interruptions +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/134949 +rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/429306 +milestone: '16.6' +type: development +group: group::import and integrate +default_enabled: false diff --git a/spec/workers/concerns/gitlab/github_import/stage_methods_spec.rb b/spec/workers/concerns/gitlab/github_import/stage_methods_spec.rb index 898606845a2985180fad239b325021287b1c1e7c..fa782967441b7070d9896752ac41450aff7ed23b 100644 --- a/spec/workers/concerns/gitlab/github_import/stage_methods_spec.rb +++ b/spec/workers/concerns/gitlab/github_import/stage_methods_spec.rb @@ -185,4 +185,30 @@ def self.name expect(worker.find_project(-1)).to be_nil end end + + describe '.resumes_work_when_interrupted!' do + subject(:sidekiq_options) { worker.class.sidekiq_options } + + it 'does not set the `max_retries_after_interruption` if not called' do + is_expected.not_to have_key('max_retries_after_interruption') + end + + it 'sets the `max_retries_after_interruption`' do + worker.class.resumes_work_when_interrupted! + + is_expected.to include('max_retries_after_interruption' => 20) + end + + context 'when the flag is disabled' do + before do + stub_feature_flags(github_importer_raise_max_interruptions: false) + end + + it 'does not set `max_retries_after_interruption`' do + worker.class.resumes_work_when_interrupted! + + is_expected.not_to have_key('max_retries_after_interruption') + end + end + end end diff --git a/vendor/gems/sidekiq-reliable-fetch/Gemfile.lock b/vendor/gems/sidekiq-reliable-fetch/Gemfile.lock index 57767ee8c3bfb10cf3e2139acdf15947b30bdb9f..aeb163db018b28a3f78a90168b89e55bf7d77884 100644 --- a/vendor/gems/sidekiq-reliable-fetch/Gemfile.lock +++ b/vendor/gems/sidekiq-reliable-fetch/Gemfile.lock @@ -1,7 +1,7 @@ PATH remote: . specs: - gitlab-sidekiq-fetcher (0.9.0) + gitlab-sidekiq-fetcher (0.10.0) json (>= 2.5) sidekiq (~> 6.1) diff --git a/vendor/gems/sidekiq-reliable-fetch/gitlab-sidekiq-fetcher.gemspec b/vendor/gems/sidekiq-reliable-fetch/gitlab-sidekiq-fetcher.gemspec index 0d0e5e3f6fa198807fc7d2ee1aa0beea11cc5640..b656267003aa454e576b5ab09f23db0421f1c3d6 100644 --- a/vendor/gems/sidekiq-reliable-fetch/gitlab-sidekiq-fetcher.gemspec +++ b/vendor/gems/sidekiq-reliable-fetch/gitlab-sidekiq-fetcher.gemspec @@ -1,6 +1,6 @@ Gem::Specification.new do |s| s.name = 'gitlab-sidekiq-fetcher' - s.version = '0.9.0' + s.version = '0.10.0' s.authors = ['TEA', 'GitLab'] s.email = 'valery@gitlab.com' s.license = 'LGPL-3.0' diff --git a/vendor/gems/sidekiq-reliable-fetch/lib/sidekiq/base_reliable_fetch.rb b/vendor/gems/sidekiq-reliable-fetch/lib/sidekiq/base_reliable_fetch.rb index 39b98a0109ff11af1e2a5174dc71a53eab889c0e..006aad87abefb98efc22312d3c1066bf8155dd14 100644 --- a/vendor/gems/sidekiq-reliable-fetch/lib/sidekiq/base_reliable_fetch.rb +++ b/vendor/gems/sidekiq-reliable-fetch/lib/sidekiq/base_reliable_fetch.rb @@ -230,7 +230,7 @@ def max_retries_after_interruption(worker_class) max_retries_after_interruption = nil max_retries_after_interruption ||= begin - Object.const_get(worker_class).sidekiq_options[:max_retries_after_interruption] + Object.const_get(worker_class).sidekiq_options['max_retries_after_interruption'] rescue NameError end diff --git a/vendor/gems/sidekiq-reliable-fetch/spec/base_reliable_fetch_spec.rb b/vendor/gems/sidekiq-reliable-fetch/spec/base_reliable_fetch_spec.rb index cdc4409f0d566f8c4f4eabdab47f467d2fb981ab..32e62925aafd367b12894c88fa78785472540b30 100644 --- a/vendor/gems/sidekiq-reliable-fetch/spec/base_reliable_fetch_spec.rb +++ b/vendor/gems/sidekiq-reliable-fetch/spec/base_reliable_fetch_spec.rb @@ -76,6 +76,19 @@ expect(queue2.size).to eq 1 expect(Sidekiq::InterruptedSet.new.size).to eq 0 end + + it 'does not put jobs into interrupted queue if it is disabled on the worker' do + stub_const('Bob', double(sidekiq_options: { 'max_retries_after_interruption' => -1 })) + + uow = described_class::UnitOfWork + interrupted_job = Sidekiq.dump_json(class: 'Bob', args: [1, 2, 'foo'], interrupted_count: 3) + jobs = [ uow.new('queue:foo', interrupted_job), uow.new('queue:foo', job), uow.new('queue:bar', job) ] + described_class.new(options).bulk_requeue(jobs, nil) + + expect(queue1.size).to eq 2 + expect(queue2.size).to eq 1 + expect(Sidekiq::InterruptedSet.new.size).to eq 0 + end end it 'sets heartbeat' do