From ad5b117bc10d7515d0544fc5837802f1f3b5787b Mon Sep 17 00:00:00 2001 From: Sylvester Chin <schin@gitlab.com> Date: Tue, 18 Jul 2023 12:16:33 +0000 Subject: [PATCH] Add background migration worker for Redis Changelog: added --- .rubocop_todo/gitlab/namespaced_class.yml | 1 + app/workers/all_queues.yml | 9 +++ app/workers/redis_migration_worker.rb | 40 ++++++++++ config/sidekiq_queues.yml | 2 + ...031_cleanup_project_pipeline_status_key.rb | 15 ++++ db/schema_migrations/20230703024031 | 1 + doc/development/redis.md | 45 ++++++++++++ .../backfill_project_pipeline_status_ttl.rb | 33 +++++++++ lib/gitlab/database/migration_helpers.rb | 1 + .../database/migrations/redis_helpers.rb | 17 +++++ .../size_limiter/validator.rb | 3 +- ...ckfill_project_pipeline_status_ttl_spec.rb | 37 ++++++++++ .../database/migrations/redis_helpers_spec.rb | 33 +++++++++ ...leanup_project_pipeline_status_key_spec.rb | 12 +++ spec/workers/redis_migration_worker_spec.rb | 73 +++++++++++++++++++ 15 files changed, 321 insertions(+), 1 deletion(-) create mode 100644 app/workers/redis_migration_worker.rb create mode 100644 db/post_migrate/20230703024031_cleanup_project_pipeline_status_key.rb create mode 100644 db/schema_migrations/20230703024031 create mode 100644 lib/gitlab/background_migration/redis/backfill_project_pipeline_status_ttl.rb create mode 100644 lib/gitlab/database/migrations/redis_helpers.rb create mode 100644 spec/lib/gitlab/background_migration/redis/backfill_project_pipeline_status_ttl_spec.rb create mode 100644 spec/lib/gitlab/database/migrations/redis_helpers_spec.rb create mode 100644 spec/migrations/20230703024031_cleanup_project_pipeline_status_key_spec.rb create mode 100644 spec/workers/redis_migration_worker_spec.rb diff --git a/.rubocop_todo/gitlab/namespaced_class.yml b/.rubocop_todo/gitlab/namespaced_class.yml index 4ab42f017900c..8c8306124c1ca 100644 --- a/.rubocop_todo/gitlab/namespaced_class.yml +++ b/.rubocop_todo/gitlab/namespaced_class.yml @@ -808,6 +808,7 @@ Gitlab/NamespacedClass: - 'app/workers/purge_dependency_proxy_cache_worker.rb' - 'app/workers/reactive_caching_worker.rb' - 'app/workers/rebase_worker.rb' + - 'app/workers/redis_migration_worker.rb' - 'app/workers/remote_mirror_notification_worker.rb' - 'app/workers/remove_expired_group_links_worker.rb' - 'app/workers/remove_expired_members_worker.rb' diff --git a/app/workers/all_queues.yml b/app/workers/all_queues.yml index 9e341fb92633c..6f6fd9ddb6586 100644 --- a/app/workers/all_queues.yml +++ b/app/workers/all_queues.yml @@ -3486,6 +3486,15 @@ :weight: 2 :idempotent: false :tags: [] +- :name: redis_migration + :worker_name: RedisMigrationWorker + :feature_category: :redis + :has_external_dependencies: false + :urgency: :throttled + :resource_boundary: :unknown + :weight: 1 + :idempotent: true + :tags: [] - :name: releases_create_evidence :worker_name: Releases::CreateEvidenceWorker :feature_category: :release_evidence diff --git a/app/workers/redis_migration_worker.rb b/app/workers/redis_migration_worker.rb new file mode 100644 index 0000000000000..bad9baeac70de --- /dev/null +++ b/app/workers/redis_migration_worker.rb @@ -0,0 +1,40 @@ +# frozen_string_literal: true + +class RedisMigrationWorker + include ApplicationWorker + + idempotent! + data_consistency :delayed + feature_category :redis + urgency :throttled + loggable_arguments 0 + + SCAN_START_STOP = '0' + + def perform(job_class_name, cursor, options = {}) + migrator = self.class.fetch_migrator!(job_class_name) + + scan_size = options[:scan_size] || 1000 + deadline = Time.now.utc + 3.minutes + + while Time.now.utc < deadline + cursor, keys = migrator.redis.scan(cursor, match: migrator.scan_match_pattern, count: scan_size) + + migrator.perform(keys) if keys.any? + + sleep(0.01) + break if cursor == SCAN_START_STOP + end + + self.class.perform_async(job_class_name, cursor, options) unless cursor == SCAN_START_STOP + end + + class << self + def fetch_migrator!(job_class_name) + job_class = "Gitlab::BackgroundMigration::Redis::#{job_class_name}".safe_constantize + raise NotImplementedError, "#{job_class_name} does not exist" if job_class.nil? + + job_class.new + end + end +end diff --git a/config/sidekiq_queues.yml b/config/sidekiq_queues.yml index 48805bc7e7819..7f05b8af7bf06 100644 --- a/config/sidekiq_queues.yml +++ b/config/sidekiq_queues.yml @@ -521,6 +521,8 @@ - 1 - - rebase - 2 +- - redis_migration + - 1 - - refresh_license_compliance_checks - 2 - - releases_create_evidence diff --git a/db/post_migrate/20230703024031_cleanup_project_pipeline_status_key.rb b/db/post_migrate/20230703024031_cleanup_project_pipeline_status_key.rb new file mode 100644 index 0000000000000..cb7fc04166fb2 --- /dev/null +++ b/db/post_migrate/20230703024031_cleanup_project_pipeline_status_key.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +class CleanupProjectPipelineStatusKey < Gitlab::Database::Migration[2.1] + disable_ddl_transaction! + + MIGRATION_WORKER_CLASS = 'BackfillProjectPipelineStatusTtl' + + def up + queue_redis_migration_job(MIGRATION_WORKER_CLASS) + end + + def down + # no-op + end +end diff --git a/db/schema_migrations/20230703024031 b/db/schema_migrations/20230703024031 new file mode 100644 index 0000000000000..4e68e593e67d4 --- /dev/null +++ b/db/schema_migrations/20230703024031 @@ -0,0 +1 @@ +bfbb862d6d7c54ebfa110a6266c99b9c264f4ae2d4c3b9cf07d47beb642bbb2b \ No newline at end of file diff --git a/doc/development/redis.md b/doc/development/redis.md index 5073d9350e8c7..ebc7c0271a121 100644 --- a/doc/development/redis.md +++ b/doc/development/redis.md @@ -285,3 +285,48 @@ This is used by the [`RepositorySetCache`](https://gitlab.com/gitlab-org/gitlab/-/blob/master/lib/gitlab/repository_set_cache.rb) to provide a convenient way to use sets to cache repository data like branch names. + +## Background migration + +Redis-based migrations involve using the `SCAN` command to scan the entire Redis instance for certain key patterns. +For large Redis instances, the migration might [exceed the time limit](migration_style_guide.md#how-long-a-migration-should-take) +for regular or post-deployment migrations. [`RedisMigrationWorker`](https://gitlab.com/gitlab-org/gitlab/-/blob/master/app/workers/redis_migration_worker.rb) +performs long-running Redis migrations as a background migration. + +To perform a background migration by creating a class: + +```ruby +module Gitlab + module BackgroundMigration + module Redis + class BackfillCertainKey + def perform(keys) + # implement logic to clean up or backfill keys + end + + def scan_match_pattern + # define the match pattern for the `SCAN` command + end + + def redis + # define the exact Redis instance + end + end + end + end +end +``` + +To trigger the worker through a post-deployment migration: + +```ruby +class ExampleBackfill < Gitlab::Database::Migration[2.1] + disable_ddl_transaction! + + MIGRATION='BackfillCertainKey' + + def up + queue_redis_migration_job(MIGRATION) + end +end +``` diff --git a/lib/gitlab/background_migration/redis/backfill_project_pipeline_status_ttl.rb b/lib/gitlab/background_migration/redis/backfill_project_pipeline_status_ttl.rb new file mode 100644 index 0000000000000..2672498b6271f --- /dev/null +++ b/lib/gitlab/background_migration/redis/backfill_project_pipeline_status_ttl.rb @@ -0,0 +1,33 @@ +# frozen_string_literal: true + +module Gitlab + module BackgroundMigration + module Redis + # BackfillProjectPipelineStatusTtl cleans up keys written by + # Gitlab::Cache::Ci::ProjectPipelineStatus by adding a minimum 8-hour ttl + # to all keys. This either sets or extends the ttl of matching keys. + # + class BackfillProjectPipelineStatusTtl # rubocop:disable Migration/BackgroundMigrationBaseClass + def perform(keys) + # spread out deletes over a 4 hour period starting in 8 hours time + ttl_duration = 10.hours.to_i + ttl_jitter = 2.hours.to_i + + Gitlab::Instrumentation::RedisClusterValidator.allow_cross_slot_commands do + Gitlab::Redis::CrossSlot::Pipeline.new(redis).pipelined do |pipeline| + keys.each { |key| pipeline.expire(key, ttl_duration + rand(-ttl_jitter..ttl_jitter)) } + end + end + end + + def scan_match_pattern + "#{Gitlab::Redis::Cache::CACHE_NAMESPACE}:project:*:pipeline_status" + end + + def redis + @redis ||= ::Redis.new(Gitlab::Redis::Cache.params) + end + end + end + end +end diff --git a/lib/gitlab/database/migration_helpers.rb b/lib/gitlab/database/migration_helpers.rb index 291f483e6e42d..256c524e989d5 100644 --- a/lib/gitlab/database/migration_helpers.rb +++ b/lib/gitlab/database/migration_helpers.rb @@ -11,6 +11,7 @@ module MigrationHelpers include Migrations::ConstraintsHelpers include Migrations::ExtensionHelpers include Migrations::SidekiqHelpers + include Migrations::RedisHelpers include DynamicModelHelpers include RenameTableHelpers include AsyncIndexes::MigrationHelpers diff --git a/lib/gitlab/database/migrations/redis_helpers.rb b/lib/gitlab/database/migrations/redis_helpers.rb new file mode 100644 index 0000000000000..41a2841da7ce0 --- /dev/null +++ b/lib/gitlab/database/migrations/redis_helpers.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module Migrations + module RedisHelpers + SCAN_START_CURSOR = '0' + + # Check if the migration exists before enqueueing the worker + def queue_redis_migration_job(job_name) + RedisMigrationWorker.fetch_migrator!(job_name) + RedisMigrationWorker.perform_async(job_name, SCAN_START_CURSOR) + end + end + end + end +end diff --git a/lib/gitlab/sidekiq_middleware/size_limiter/validator.rb b/lib/gitlab/sidekiq_middleware/size_limiter/validator.rb index acc3e1712abc3..b19cc994d321a 100644 --- a/lib/gitlab/sidekiq_middleware/size_limiter/validator.rb +++ b/lib/gitlab/sidekiq_middleware/size_limiter/validator.rb @@ -33,7 +33,8 @@ class Validator EXEMPT_WORKER_NAMES = %w[BackgroundMigrationWorker BackgroundMigration::CiDatabaseWorker Database::BatchedBackgroundMigrationWorker - Database::BatchedBackgroundMigration::CiDatabaseWorker].to_set + Database::BatchedBackgroundMigration::CiDatabaseWorker + RedisMigrationWorker].to_set JOB_STATUS_KEY = 'size_limiter' diff --git a/spec/lib/gitlab/background_migration/redis/backfill_project_pipeline_status_ttl_spec.rb b/spec/lib/gitlab/background_migration/redis/backfill_project_pipeline_status_ttl_spec.rb new file mode 100644 index 0000000000000..e3b1b67cb4080 --- /dev/null +++ b/spec/lib/gitlab/background_migration/redis/backfill_project_pipeline_status_ttl_spec.rb @@ -0,0 +1,37 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::BackgroundMigration::Redis::BackfillProjectPipelineStatusTtl, + :clean_gitlab_redis_cache, feature_category: :redis do + let(:redis) { ::Redis.new(::Gitlab::Redis::Cache.params) } + let(:keys) { ["cache:gitlab:project:1:pipeline_status", "cache:gitlab:project:2:pipeline_status"] } + let(:invalid_keys) { ["cache:gitlab:project:pipeline_status:1", "cache:gitlab:project:pipeline_status:2"] } + + subject { described_class.new } + + before do + (keys + invalid_keys).each { |key| redis.set(key, 1) } + end + + describe '#perform' do + it 'sets a ttl on given keys' do + subject.perform(keys) + + keys.each do |k| + expect(redis.ttl(k)).to be > 0 + end + end + end + + describe '#scan_match_pattern' do + it "finds all the required keys only" do + expect(redis.scan('0').second).to match_array(keys + invalid_keys) + expect(subject.redis.scan_each(match: subject.scan_match_pattern).to_a).to contain_exactly(*keys) + end + end + + describe '#redis' do + it { expect(subject.redis.inspect).to eq(redis.inspect) } + end +end diff --git a/spec/lib/gitlab/database/migrations/redis_helpers_spec.rb b/spec/lib/gitlab/database/migrations/redis_helpers_spec.rb new file mode 100644 index 0000000000000..5f3a48289e210 --- /dev/null +++ b/spec/lib/gitlab/database/migrations/redis_helpers_spec.rb @@ -0,0 +1,33 @@ +# frozen_string_literal: true + +require "spec_helper" + +RSpec.describe Gitlab::Database::Migrations::RedisHelpers, feature_category: :redis do + let(:migration) do + ActiveRecord::Migration.new.extend(described_class) + end + + describe "#queue_redis_migration_job" do + let(:job_name) { 'SampleJob' } + + subject { migration.queue_redis_migration_job(job_name) } + + context 'when migrator does not exist' do + it 'raises error and fails the migration' do + expect { subject }.to raise_error(NotImplementedError) + end + end + + context 'when migrator exists' do + before do + allow(RedisMigrationWorker).to receive(:fetch_migrator!) + end + + it 'checks migrator and enqueues job' do + expect(RedisMigrationWorker).to receive(:perform_async).with(job_name, '0') + + subject + end + end + end +end diff --git a/spec/migrations/20230703024031_cleanup_project_pipeline_status_key_spec.rb b/spec/migrations/20230703024031_cleanup_project_pipeline_status_key_spec.rb new file mode 100644 index 0000000000000..4232162134a80 --- /dev/null +++ b/spec/migrations/20230703024031_cleanup_project_pipeline_status_key_spec.rb @@ -0,0 +1,12 @@ +# frozen_string_literal: true + +require 'spec_helper' +require_migration! + +RSpec.describe CleanupProjectPipelineStatusKey, feature_category: :redis do + it 'enqueues a RedisMigrationWorker job from cursor 0' do + expect(RedisMigrationWorker).to receive(:perform_async).with('BackfillProjectPipelineStatusTtl', '0') + + migrate! + end +end diff --git a/spec/workers/redis_migration_worker_spec.rb b/spec/workers/redis_migration_worker_spec.rb new file mode 100644 index 0000000000000..ad0186e929d2b --- /dev/null +++ b/spec/workers/redis_migration_worker_spec.rb @@ -0,0 +1,73 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe RedisMigrationWorker, :clean_gitlab_redis_shared_state, feature_category: :redis do + describe '.fetch_migrator!' do + it 'raise error if class does not exist' do + expect { described_class.fetch_migrator!('UnknownClass') }.to raise_error(NotImplementedError) + end + + context 'when class exists' do + it 'returns an instance' do + expect( + described_class.fetch_migrator!('BackfillProjectPipelineStatusTtl') + ).to be_a Gitlab::BackgroundMigration::Redis::BackfillProjectPipelineStatusTtl + end + end + end + + describe '#perform' do + let(:job_class_name) { 'SampleJob' } + let(:migrator_class) do + Class.new do + def perform(keys) + keys.each { |key| redis.set(key, "adjusted", ex: 10) } + end + + def scan_match_pattern + 'sample:*:pattern' + end + + def redis + ::Redis.new(::Gitlab::Redis::Cache.params) + end + end + end + + let(:migrator) { migrator_class.new } + + before do + allow(described_class).to receive(:fetch_migrator!).with(job_class_name).and_return(migrator) + + 100.times do |i| + migrator.redis.set("sample:#{i}:pattern", i) + end + end + + it 'runs migration logic on scanned keys' do + expect(migrator).to receive(:perform) + + subject.perform(job_class_name, '0') + end + + context 'when job exceeds deadline' do + before do + # stub Time.now to force the 3rd invocation to timeout + now = Time.now # rubocop:disable Rails/TimeZone + allow(Time).to receive(:now).and_return(now, now, now + 5.minutes) + end + + it 'enqueues another job and returns' do + expect(described_class).to receive(:perform_async) + + # use smaller scan_size to ensure multiple scans are required + subject.perform(job_class_name, '0', { scan_size: 10 }) + end + end + + it_behaves_like 'an idempotent worker' do + let(:job_args) { [job_class_name, '0'] } + end + end +end -- GitLab