From 2fda52e5b7c0f7ee4311b80cf7b3d2bcfab8336f Mon Sep 17 00:00:00 2001 From: Marius Bobin <mbobin@gitlab.com> Date: Wed, 18 Dec 2024 09:11:59 +0000 Subject: [PATCH] Reclaim disk space used by old job tokens Changelog: other --- .../remove_old_job_tokens.yml | 8 ++ ...41217154001_queue_remove_old_job_tokens.rb | 26 ++++++ db/schema_migrations/20241217154001 | 1 + .../remove_old_job_tokens.rb | 28 +++++++ .../remove_old_job_tokens_spec.rb | 84 +++++++++++++++++++ ...154001_queue_remove_old_job_tokens_spec.rb | 27 ++++++ 6 files changed, 174 insertions(+) create mode 100644 db/docs/batched_background_migrations/remove_old_job_tokens.yml create mode 100644 db/post_migrate/20241217154001_queue_remove_old_job_tokens.rb create mode 100644 db/schema_migrations/20241217154001 create mode 100644 lib/gitlab/background_migration/remove_old_job_tokens.rb create mode 100644 spec/lib/gitlab/background_migration/remove_old_job_tokens_spec.rb create mode 100644 spec/migrations/20241217154001_queue_remove_old_job_tokens_spec.rb diff --git a/db/docs/batched_background_migrations/remove_old_job_tokens.yml b/db/docs/batched_background_migrations/remove_old_job_tokens.yml new file mode 100644 index 0000000000000..868b36d2df1a2 --- /dev/null +++ b/db/docs/batched_background_migrations/remove_old_job_tokens.yml @@ -0,0 +1,8 @@ +--- +migration_job_name: RemoveOldJobTokens +description: Reclaim disk space taken by old job tokens +feature_category: continuous_integration +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/176025 +milestone: '17.8' +queued_migration_version: 20241217154001 +finalized_by: # version of the migration that finalized this BBM diff --git a/db/post_migrate/20241217154001_queue_remove_old_job_tokens.rb b/db/post_migrate/20241217154001_queue_remove_old_job_tokens.rb new file mode 100644 index 0000000000000..f74409f1a1214 --- /dev/null +++ b/db/post_migrate/20241217154001_queue_remove_old_job_tokens.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +class QueueRemoveOldJobTokens < Gitlab::Database::Migration[2.2] + milestone '17.8' + restrict_gitlab_migration gitlab_schema: :gitlab_ci + + MIGRATION = "RemoveOldJobTokens" + DELAY_INTERVAL = 2.minutes + BATCH_SIZE = 1000 + SUB_BATCH_SIZE = 100 + + def up + queue_batched_background_migration( + MIGRATION, + :p_ci_builds, + :id, + job_interval: DELAY_INTERVAL, + batch_size: BATCH_SIZE, + sub_batch_size: SUB_BATCH_SIZE + ) + end + + def down + delete_batched_background_migration(MIGRATION, :p_ci_builds, :id, []) + end +end diff --git a/db/schema_migrations/20241217154001 b/db/schema_migrations/20241217154001 new file mode 100644 index 0000000000000..63f575b6952e8 --- /dev/null +++ b/db/schema_migrations/20241217154001 @@ -0,0 +1 @@ +6efa41f564e0a16989e98d28b9d31c614142b5b95b1978b14d497275ca71a3ba \ No newline at end of file diff --git a/lib/gitlab/background_migration/remove_old_job_tokens.rb b/lib/gitlab/background_migration/remove_old_job_tokens.rb new file mode 100644 index 0000000000000..412488dda3161 --- /dev/null +++ b/lib/gitlab/background_migration/remove_old_job_tokens.rb @@ -0,0 +1,28 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + class RemoveOldJobTokens < BatchedMigrationJob + operation_name :remove_old_tokens + feature_category :continuous_integration + + ACTIVE_STATUSES = %w[ + waiting_for_resource + preparing + waiting_for_callback + pending + running + ].freeze + + def perform + each_sub_batch do |sub_batch| + sub_batch + .where.not(status: ACTIVE_STATUSES) + .where.not(token_encrypted: nil) + .where(created_at: ...1.month.ago) + .update_all(token_encrypted: nil) + end + end + end + end +end diff --git a/spec/lib/gitlab/background_migration/remove_old_job_tokens_spec.rb b/spec/lib/gitlab/background_migration/remove_old_job_tokens_spec.rb new file mode 100644 index 0000000000000..91617d4a30af5 --- /dev/null +++ b/spec/lib/gitlab/background_migration/remove_old_job_tokens_spec.rb @@ -0,0 +1,84 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::BackgroundMigration::RemoveOldJobTokens, + feature_category: :continuous_integration, migration: :gitlab_ci do + let(:connection) { Ci::ApplicationRecord.connection } + let(:pipelines_table) { table(:p_ci_pipelines, database: :ci, primary_key: :id) } + let(:builds_table) { table(:p_ci_builds, database: :ci, primary_key: :id) } + + let(:default_attributes) { { project_id: 500, partition_id: 100 } } + let!(:old_pipeline) { pipelines_table.create!(default_attributes) } + let!(:new_pipeline) { pipelines_table.create!(default_attributes) } + + let(:all_statuses) do + %w[created + waiting_for_resource + preparing + waiting_for_callback + pending + running + success + failed + canceling + canceled + skipped + manual + scheduled] + end + + let(:active_statuses) { described_class::ACTIVE_STATUSES } + let(:inactive_statuses) { all_statuses - active_statuses } + + before do + insert_jobs(pipeline: old_pipeline, created_at: 2.months.ago) + insert_jobs(pipeline: new_pipeline, created_at: 1.week.ago) + end + + describe '#perform' do + subject(:migration) do + described_class.new( + start_id: builds_table.minimum(:id), + end_id: builds_table.maximum(:id), + batch_table: :p_ci_builds, + batch_column: :id, + sub_batch_size: 100, + pause_ms: 0, + connection: connection + ) + end + + it 'nullifies tokens for old not running jobs', :aggregate_failures do + expect(builds_table.where(token_encrypted: nil).any?).to be_falsey + + expect { migration.perform }.to not_change { builds_table.count } + + expect(builds_for(new_pipeline).where.not(token_encrypted: nil).count) + .to eq(all_statuses.count) + + expect(builds_for(old_pipeline, status: active_statuses, token_encrypted: nil)) + .to be_empty + + expect(builds_for(old_pipeline, status: inactive_statuses, token_encrypted: nil).count) + .to eq(inactive_statuses.count) + end + end + + def insert_jobs(pipeline:, created_at:) + data = all_statuses.map do |status| + default_attributes.merge( + status: status, + token_encrypted: SecureRandom.hex, + commit_id: pipeline.id, + created_at: created_at + ) + end + + builds_table.insert_all(data, unique_by: [:id, :partition_id]) + end + + def builds_for(pipeline, attrs = {}) + builds_table.where(commit_id: pipeline, **attrs) + end +end diff --git a/spec/migrations/20241217154001_queue_remove_old_job_tokens_spec.rb b/spec/migrations/20241217154001_queue_remove_old_job_tokens_spec.rb new file mode 100644 index 0000000000000..8debf77a0cbff --- /dev/null +++ b/spec/migrations/20241217154001_queue_remove_old_job_tokens_spec.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +require 'spec_helper' +require_migration! + +RSpec.describe QueueRemoveOldJobTokens, migration: :gitlab_ci, feature_category: :continuous_integration do + let!(:batched_migration) { described_class::MIGRATION } + + it 'schedules a new batched migration' do + reversible_migration do |migration| + migration.before -> { + expect(batched_migration).not_to have_scheduled_batched_migration + } + + migration.after -> { + expect(batched_migration).to have_scheduled_batched_migration( + gitlab_schema: :gitlab_ci, + table_name: :p_ci_builds, + column_name: :id, + interval: described_class::DELAY_INTERVAL, + batch_size: described_class::BATCH_SIZE, + sub_batch_size: described_class::SUB_BATCH_SIZE + ) + } + end + end +end -- GitLab