From e4a7ff58142e84458a3f18e838de333c2ab9891c Mon Sep 17 00:00:00 2001 From: George Koltsov <gkoltsov@gitlab.com> Date: Fri, 28 Feb 2025 21:07:58 +0000 Subject: [PATCH] Move Import file removal to a cron job Update file-based Project/Group Import to remove import_file used to do the import to a cron job and not during the import process to allow future restarts of the process. Changelog: other --- app/models/import_export_upload.rb | 2 + .../groups/import_export/import_service.rb | 2 +- .../import/import_file_cleanup_service.rb | 19 +++++++ app/workers/all_queues.yml | 20 +++++++ .../import/import_file_cleanup_worker.rb | 18 +++++++ .../import/remove_import_file_worker.rb | 34 ++++++++++++ app/workers/repository_import_worker.rb | 14 +++++ config/initializers/1_settings.rb | 3 ++ config/sidekiq_queues.yml | 2 + ...port_export_uploads_on_update_at_and_id.rb | 16 ++++++ db/schema_migrations/20250227104609 | 1 + db/structure.sql | 2 + lib/gitlab/import_export/importer.rb | 3 +- .../lib/gitlab/import_export/importer_spec.rb | 2 +- spec/models/import_export_upload_spec.rb | 10 +++- .../import_file_cleanup_service_spec.rb | 35 +++++++++++++ .../import/import_file_cleanup_worker_spec.rb | 19 +++++++ .../import/remove_import_file_worker_spec.rb | 52 +++++++++++++++++++ spec/workers/repository_import_worker_spec.rb | 40 ++++++++++---- 19 files changed, 278 insertions(+), 16 deletions(-) create mode 100644 app/services/import/import_file_cleanup_service.rb create mode 100644 app/workers/gitlab/import/import_file_cleanup_worker.rb create mode 100644 app/workers/gitlab/import/remove_import_file_worker.rb create mode 100644 db/migrate/20250227104609_add_partial_index_to_import_export_uploads_on_update_at_and_id.rb create mode 100644 db/schema_migrations/20250227104609 create mode 100644 spec/services/import/import_file_cleanup_service_spec.rb create mode 100644 spec/workers/gitlab/import/import_file_cleanup_worker_spec.rb create mode 100644 spec/workers/gitlab/import/remove_import_file_worker_spec.rb diff --git a/app/models/import_export_upload.rb b/app/models/import_export_upload.rb index ce4c1e1b91520..654f3935c0162 100644 --- a/app/models/import_export_upload.rb +++ b/app/models/import_export_upload.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true class ImportExportUpload < ApplicationRecord + include EachBatch include WithUploads belongs_to :project @@ -21,6 +22,7 @@ class ImportExportUpload < ApplicationRecord scope :updated_before, ->(date) { where('updated_at < ?', date) } scope :with_export_file, -> { where.not(export_file: nil) } + scope :with_import_file, -> { where.not(import_file: nil) } def retrieve_upload(_identifier, paths) Upload.find_by(model: self, path: paths) diff --git a/app/services/groups/import_export/import_service.rb b/app/services/groups/import_export/import_service.rb index 1969c29121f70..7dd3b06c04479 100644 --- a/app/services/groups/import_export/import_service.rb +++ b/app/services/groups/import_export/import_service.rb @@ -30,6 +30,7 @@ def execute Gitlab::Tracking.event(self.class.name, 'create', label: 'import_group_from_file') if valid_user_permissions? && import_file && valid_import_file? && restorers.all?(&:restore) + remove_import_file notify_success Gitlab::Tracking.event( @@ -47,7 +48,6 @@ def execute ensure remove_base_tmp_dir - remove_import_file end private diff --git a/app/services/import/import_file_cleanup_service.rb b/app/services/import/import_file_cleanup_service.rb new file mode 100644 index 0000000000000..4273029aaaa4d --- /dev/null +++ b/app/services/import/import_file_cleanup_service.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +module Import + class ImportFileCleanupService + LAST_MODIFIED = 72.hours + BATCH_SIZE = 100 + + def execute + ImportExportUpload + .with_import_file + .updated_before(LAST_MODIFIED.ago) + .each_batch(of: BATCH_SIZE) do |batch| + batch.each do |upload| + ::Gitlab::Import::RemoveImportFileWorker.perform_async(upload.id) + end + end + end + end +end diff --git a/app/workers/all_queues.yml b/app/workers/all_queues.yml index f98e31efe9b2f..37400d360b8c0 100644 --- a/app/workers/all_queues.yml +++ b/app/workers/all_queues.yml @@ -636,6 +636,16 @@ :idempotent: false :tags: [] :queue_namespace: :cronjob +- :name: cronjob:import_import_file_cleanup + :worker_name: Gitlab::Import::ImportFileCleanupWorker + :feature_category: :importers + :has_external_dependencies: false + :urgency: :low + :resource_boundary: :unknown + :weight: 1 + :idempotent: true + :tags: [] + :queue_namespace: :cronjob - :name: cronjob:import_stuck_project_import_jobs :worker_name: Gitlab::Import::StuckProjectImportJobsWorker :feature_category: :importers @@ -3820,6 +3830,16 @@ :idempotent: true :tags: [] :queue_namespace: +- :name: import_remove_import_file + :worker_name: Gitlab::Import::RemoveImportFileWorker + :feature_category: :importers + :has_external_dependencies: false + :urgency: :low + :resource_boundary: :unknown + :weight: 1 + :idempotent: true + :tags: [] + :queue_namespace: - :name: import_user_mapping_assignment_from_csv :worker_name: Import::UserMapping::AssignmentFromCsvWorker :feature_category: :importers diff --git a/app/workers/gitlab/import/import_file_cleanup_worker.rb b/app/workers/gitlab/import/import_file_cleanup_worker.rb new file mode 100644 index 0000000000000..9180df79b6042 --- /dev/null +++ b/app/workers/gitlab/import/import_file_cleanup_worker.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: true + +module Gitlab + module Import + class ImportFileCleanupWorker + include ApplicationWorker + include CronjobQueue # rubocop:disable Scalability/CronWorkerContext -- no context in this worker + + idempotent! + feature_category :importers + data_consistency :sticky + + def perform + ::Import::ImportFileCleanupService.new.execute + end + end + end +end diff --git a/app/workers/gitlab/import/remove_import_file_worker.rb b/app/workers/gitlab/import/remove_import_file_worker.rb new file mode 100644 index 0000000000000..66a7414b186d4 --- /dev/null +++ b/app/workers/gitlab/import/remove_import_file_worker.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +module Gitlab + module Import + class RemoveImportFileWorker + include ApplicationWorker + + idempotent! + feature_category :importers + data_consistency :sticky + + def perform(upload_id) + upload = ImportExportUpload.find_by_id(upload_id) + + return unless upload + + upload.remove_import_file! + upload.save! + + logger.info( + message: 'Removed ImportExportUpload import_file', + project_id: upload.project_id, + group_id: upload.group_id + ) + end + + private + + def logger + @logger ||= ::Import::Framework::Logger.build + end + end + end +end diff --git a/app/workers/repository_import_worker.rb b/app/workers/repository_import_worker.rb index 2eabcd39f2312..fe3645fe24b2b 100644 --- a/app/workers/repository_import_worker.rb +++ b/app/workers/repository_import_worker.rb @@ -6,6 +6,7 @@ class RepositoryImportWorker # rubocop:disable Scalability/IdempotentWorker data_consistency :always include ExceptionBacktrace include ProjectStartImport + include Sidekiq::InterruptionsExhausted feature_category :importers worker_has_external_dependencies! @@ -14,6 +15,10 @@ class RepositoryImportWorker # rubocop:disable Scalability/IdempotentWorker sidekiq_options status_expiration: Gitlab::Import::StuckImportJob::IMPORT_JOBS_EXPIRATION worker_resource_boundary :memory + sidekiq_interruptions_exhausted do |job| + new.perform_failure(job['args'].first) + end + def perform(project_id) Gitlab::QueryLimiting.disable!('https://gitlab.com/gitlab-org/gitlab/-/issues/464677') @@ -37,6 +42,15 @@ def perform(project_id) end end + def perform_failure(project_id) + @project = Project.find_by_id(project_id) + import_export_upload = @project.import_export_uploads.find_by_user_id(project.creator.id) + + fail_import('Import process reached the maximum number of interruptions') + + ::Gitlab::Import::RemoveImportFileWorker.perform_async(import_export_upload.id) + end + private attr_reader :project diff --git a/config/initializers/1_settings.rb b/config/initializers/1_settings.rb index 8a02b72906abd..7df1eab32de65 100644 --- a/config/initializers/1_settings.rb +++ b/config/initializers/1_settings.rb @@ -536,6 +536,9 @@ Settings.cron_jobs['import_export_project_cleanup_worker'] ||= {} Settings.cron_jobs['import_export_project_cleanup_worker']['cron'] ||= '0 * * * *' Settings.cron_jobs['import_export_project_cleanup_worker']['job_class'] = 'ImportExportProjectCleanupWorker' +Settings.cron_jobs['gitlab_import_import_file_cleanup_worker'] ||= {} +Settings.cron_jobs['gitlab_import_import_file_cleanup_worker']['cron'] ||= '30 * * * *' +Settings.cron_jobs['gitlab_import_import_file_cleanup_worker']['job_class'] = 'Gitlab::Import::ImportFileCleanupWorker' Settings.cron_jobs['ci_archive_traces_cron_worker'] ||= {} Settings.cron_jobs['ci_archive_traces_cron_worker']['cron'] ||= '17 * * * *' Settings.cron_jobs['ci_archive_traces_cron_worker']['job_class'] = 'Ci::ArchiveTracesCronWorker' diff --git a/config/sidekiq_queues.yml b/config/sidekiq_queues.yml index 582d299ffc2ab..46b240129f405 100644 --- a/config/sidekiq_queues.yml +++ b/config/sidekiq_queues.yml @@ -471,6 +471,8 @@ - 1 - - import_refresh_import_jid - 1 +- - import_remove_import_file + - 1 - - import_user_mapping_assignment_from_csv - 1 - - incident_management diff --git a/db/migrate/20250227104609_add_partial_index_to_import_export_uploads_on_update_at_and_id.rb b/db/migrate/20250227104609_add_partial_index_to_import_export_uploads_on_update_at_and_id.rb new file mode 100644 index 0000000000000..da2861d678144 --- /dev/null +++ b/db/migrate/20250227104609_add_partial_index_to_import_export_uploads_on_update_at_and_id.rb @@ -0,0 +1,16 @@ +# frozen_string_literal: true + +class AddPartialIndexToImportExportUploadsOnUpdateAtAndId < Gitlab::Database::Migration[2.2] + milestone '17.10' + disable_ddl_transaction! + + INDEX_NAME = 'idx_import_export_uploads_updated_at_id_import_file' + + def up + add_concurrent_index :import_export_uploads, [:updated_at, :id], where: 'import_file IS NOT NULL', name: INDEX_NAME + end + + def down + remove_concurrent_index_by_name :import_export_uploads, INDEX_NAME + end +end diff --git a/db/schema_migrations/20250227104609 b/db/schema_migrations/20250227104609 new file mode 100644 index 0000000000000..f05e6f4c82dd8 --- /dev/null +++ b/db/schema_migrations/20250227104609 @@ -0,0 +1 @@ +ce4fb636326638e05acb2a7dc0109706faa85b117232e7dd5e9f32cf07240152 \ No newline at end of file diff --git a/db/structure.sql b/db/structure.sql index 9fb6101a663be..aed86583716c5 100644 --- a/db/structure.sql +++ b/db/structure.sql @@ -31058,6 +31058,8 @@ CREATE INDEX idx_hosted_runner_usage_on_project_billing_month ON ci_gitlab_hoste CREATE UNIQUE INDEX idx_hosted_runner_usage_unique ON ci_gitlab_hosted_runner_monthly_usages USING btree (runner_id, billing_month, root_namespace_id, project_id); +CREATE INDEX idx_import_export_uploads_updated_at_id_import_file ON import_export_uploads USING btree (updated_at, id) WHERE (import_file IS NOT NULL); + CREATE UNIQUE INDEX idx_import_placeholder_memberships_on_source_user_group_id ON import_placeholder_memberships USING btree (source_user_id, group_id); CREATE INDEX idx_import_placeholder_memberships_on_source_user_id_and_id ON import_placeholder_memberships USING btree (source_user_id, id); diff --git a/lib/gitlab/import_export/importer.rb b/lib/gitlab/import_export/importer.rb index de1ffdabf7b75..0fcdd1e0460ff 100644 --- a/lib/gitlab/import_export/importer.rb +++ b/lib/gitlab/import_export/importer.rb @@ -19,6 +19,8 @@ def initialize(project) def execute if import_file && check_version! && restorers.all?(&:restore) && overwrite_project + remove_import_file + project else raise Projects::ImportService::Error, shared.errors.to_sentence @@ -32,7 +34,6 @@ def execute raise Projects::ImportService::Error, e.message ensure remove_base_tmp_dir - remove_import_file end private diff --git a/spec/lib/gitlab/import_export/importer_spec.rb b/spec/lib/gitlab/import_export/importer_spec.rb index 71d8a316b1fc3..9d1932e92f5d5 100644 --- a/spec/lib/gitlab/import_export/importer_spec.rb +++ b/spec/lib/gitlab/import_export/importer_spec.rb @@ -2,7 +2,7 @@ require 'spec_helper' -RSpec.describe Gitlab::ImportExport::Importer do +RSpec.describe Gitlab::ImportExport::Importer, feature_category: :importers do let(:user) { create(:user) } let(:test_path) { "#{Dir.tmpdir}/importer_spec" } let(:shared) { project.import_export_shared } diff --git a/spec/models/import_export_upload_spec.rb b/spec/models/import_export_upload_spec.rb index 6ac7ef0611d3e..51704bed78b36 100644 --- a/spec/models/import_export_upload_spec.rb +++ b/spec/models/import_export_upload_spec.rb @@ -2,7 +2,7 @@ require 'spec_helper' -RSpec.describe ImportExportUpload do +RSpec.describe ImportExportUpload, feature_category: :importers do let(:project) { create(:project) } subject(:import_export_upload) { described_class.new(project: project) } @@ -35,7 +35,7 @@ describe 'scopes' do let_it_be(:upload1) { create(:import_export_upload, export_file: fixture_file_upload('spec/fixtures/project_export.tar.gz')) } - let_it_be(:upload2) { create(:import_export_upload, export_file: nil) } + let_it_be(:upload2) { create(:import_export_upload, export_file: nil, import_file: fixture_file_upload('spec/fixtures/project_export.tar.gz')) } let_it_be(:upload3) { create(:import_export_upload, export_file: fixture_file_upload('spec/fixtures/project_export.tar.gz'), updated_at: 25.hours.ago) } let_it_be(:upload4) { create(:import_export_upload, export_file: nil, updated_at: 2.days.ago) } @@ -45,6 +45,12 @@ end end + describe '.with_import_file' do + it 'returns uploads with import file' do + expect(described_class.with_import_file).to contain_exactly(upload2) + end + end + describe '.updated_before' do it 'returns uploads for a specified date' do expect(described_class.updated_before(24.hours.ago)).to contain_exactly(upload3, upload4) diff --git a/spec/services/import/import_file_cleanup_service_spec.rb b/spec/services/import/import_file_cleanup_service_spec.rb new file mode 100644 index 0000000000000..bc1939a449606 --- /dev/null +++ b/spec/services/import/import_file_cleanup_service_spec.rb @@ -0,0 +1,35 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Import::ImportFileCleanupService, feature_category: :importers do + subject(:service) { described_class.new } + + describe '#execute' do + it 'enqueues a removal job for old import_file' do + upload = create( + :import_export_upload, + updated_at: (described_class::LAST_MODIFIED + 1.hour).ago, + import_file: fixture_file_upload('spec/fixtures/project_export.tar.gz') + ) + + expect(::Gitlab::Import::RemoveImportFileWorker).to receive(:perform_async).with(upload.id) + + service.execute + end + + context 'when import_file is new' do + it 'does not enqueue removal job' do + create( + :import_export_upload, + updated_at: (described_class::LAST_MODIFIED - 1.hour).ago, + import_file: fixture_file_upload('spec/fixtures/project_export.tar.gz') + ) + + expect(::Gitlab::Import::RemoveImportFileWorker).not_to receive(:perform_async) + + service.execute + end + end + end +end diff --git a/spec/workers/gitlab/import/import_file_cleanup_worker_spec.rb b/spec/workers/gitlab/import/import_file_cleanup_worker_spec.rb new file mode 100644 index 0000000000000..72fe32a53d156 --- /dev/null +++ b/spec/workers/gitlab/import/import_file_cleanup_worker_spec.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Import::ImportFileCleanupWorker, feature_category: :importers do + subject(:worker) { described_class.new } + + describe '#perform' do + it_behaves_like 'an idempotent worker' + + it 'executes Import::ImportFileCleanupService' do + expect_next_instance_of(Import::ImportFileCleanupService) do |service| + expect(service).to receive(:execute) + end + + worker.perform + end + end +end diff --git a/spec/workers/gitlab/import/remove_import_file_worker_spec.rb b/spec/workers/gitlab/import/remove_import_file_worker_spec.rb new file mode 100644 index 0000000000000..b7ca4e4c74a86 --- /dev/null +++ b/spec/workers/gitlab/import/remove_import_file_worker_spec.rb @@ -0,0 +1,52 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::Import::RemoveImportFileWorker, feature_category: :importers do + let(:upload) do + create( + :import_export_upload, + updated_at: 4.days.ago, + import_file: fixture_file_upload('spec/fixtures/project_export.tar.gz') + ) + end + + subject(:worker) { described_class.new } + + describe '#perform' do + before do + allow_next_instance_of(::Import::Framework::Logger) do |logger| + allow(logger).to receive(:info) + end + end + + it_behaves_like 'an idempotent worker' do + let(:job_args) { [upload.id] } + end + + it 'removes import_file of the upload and logs' do + expect_next_instance_of(::Import::Framework::Logger) do |logger| + expect(logger) + .to receive(:info) + .with( + message: 'Removed ImportExportUpload import_file', + project_id: upload.project_id, + group_id: upload.group_id + ) + end + + expect { worker.perform(upload.id) }.to change { upload.reload.import_file.file.nil? }.to(true) + end + + context 'when upload cannot be found' do + it 'returns' do + expect(ImportExportUpload).to receive(:find_by_id).with(upload.id).and_return(nil) + allow(upload).to receive(:remove_import_file!) + + worker.perform(upload.id) + + expect(upload).not_to have_received(:remove_import_file!) + end + end + end +end diff --git a/spec/workers/repository_import_worker_spec.rb b/spec/workers/repository_import_worker_spec.rb index 44a0a0e0cf0f2..3eff7f20ffd97 100644 --- a/spec/workers/repository_import_worker_spec.rb +++ b/spec/workers/repository_import_worker_spec.rb @@ -3,18 +3,18 @@ require 'spec_helper' RSpec.describe RepositoryImportWorker, feature_category: :importers do - describe '#perform' do - let(:project) { build_stubbed(:project, :import_scheduled, import_state: import_state, import_url: 'url') } - let(:import_state) { create(:import_state, status: :scheduled) } - let(:jid) { '12345678' } - - before do - allow(subject).to receive(:jid).and_return(jid) - allow(Project).to receive(:find_by_id).with(project.id).and_return(project) - allow(project).to receive(:after_import) - allow(import_state).to receive(:start).and_return(true) - end + let(:project) { build_stubbed(:project, :import_scheduled, import_state: import_state, import_url: 'url') } + let(:import_state) { create(:import_state, status: :scheduled) } + let(:jid) { '12345678' } + + before do + allow(subject).to receive(:jid).and_return(jid) + allow(Project).to receive(:find_by_id).with(project.id).and_return(project) + allow(project).to receive(:after_import) + allow(import_state).to receive(:start).and_return(true) + end + describe '#perform' do context 'when project not found (deleted)' do before do allow(Project).to receive(:find_by_id).with(project.id).and_return(nil) @@ -88,4 +88,22 @@ end end end + + describe '.sidekiq_interruptions_exhausted' do + it 'sets import status to failed and removes import_file' do + user = build_stubbed(:user) + upload = build_stubbed(:import_export_upload, project: project, user: user) + job = { 'args' => [project.id] } + + allow(import_state).to receive(:mark_as_failed) + allow(project).to receive_message_chain(:import_export_uploads, :find_by_user_id).and_return(upload) + expect(::Gitlab::Import::RemoveImportFileWorker).to receive(:perform_async).with(upload.id) + + described_class.interruptions_exhausted_block.call(job) + + expect(import_state) + .to have_received(:mark_as_failed) + .with('Import process reached the maximum number of interruptions') + end + end end -- GitLab