From 64cd00b6fa19b93f420b8153af81b73543f4db7d Mon Sep 17 00:00:00 2001 From: Lukas Wanko <lwanko@gitlab.com> Date: Fri, 15 Nov 2024 23:49:05 +0000 Subject: [PATCH] Allow Sidekiq workers to be disabled by default by configuration Till now, we enabled all workers when a node was connected as a geo node; we now only enable the needed geo workers. With that, we can in the future disable the offline cloud license provision worker for SaaS. This features is behind a feature flag disable_bulk_sidekiq_job_activation EE: true Changelog: added --- config/initializers/sidekiq.rb | 15 +--- .../stop_bulk_sidekiq_job_activation.yml | 9 ++ ee/lib/gitlab/geo/cron_manager.rb | 80 ++++++++++++++---- ee/lib/gitlab/geo/geo_tasks.rb | 10 +++ ee/spec/lib/gitlab/geo/cron_manager_spec.rb | 84 +++++++++++++++---- ee/spec/lib/gitlab/geo/geo_tasks_spec.rb | 55 ++++++++++-- ee/spec/tasks/geo_rake_spec.rb | 11 ++- .../sidekiq_config/cron_job_initializer.rb | 30 +++++++ .../cron_job_initializer_spec.rb} | 33 ++++++-- 9 files changed, 262 insertions(+), 65 deletions(-) create mode 100644 ee/config/feature_flags/experiment/stop_bulk_sidekiq_job_activation.yml create mode 100644 lib/gitlab/sidekiq_config/cron_job_initializer.rb rename spec/{initializers/sidekiq_spec.rb => lib/gitlab/sidekiq_config/cron_job_initializer_spec.rb} (65%) diff --git a/config/initializers/sidekiq.rb b/config/initializers/sidekiq.rb index 66585ffe29bf2..81397aa9538b7 100644 --- a/config/initializers/sidekiq.rb +++ b/config/initializers/sidekiq.rb @@ -5,18 +5,6 @@ def self.enabled? end end -def load_cron_jobs! - # Set source to schedule to clear any missing jobs - # See https://github.com/sidekiq-cron/sidekiq-cron/pull/431 - Sidekiq::Cron::Job.load_from_hash! Gitlab::SidekiqConfig.cron_jobs, source: 'schedule' - - Gitlab.ee do - Gitlab::Mirror.configure_cron_job! - - Gitlab::Geo.configure_cron_jobs! - end -end - # initialise migrated_shards on start-up to catch any malformed SIDEKIQ_MIGRATED_SHARD lists. Gitlab::SidekiqSharding::Router.migrated_shards @@ -119,7 +107,8 @@ def load_cron_jobs! config[:cron_poll_interval] = Gitlab.config.cron_jobs.poll_interval config[:cron_poll_interval] = 0 if queue_instance != Gitlab::Redis::Queues::SIDEKIQ_MAIN_SHARD_INSTANCE_NAME - load_cron_jobs! + + Gitlab::SidekiqConfig::CronJobInitializer.execute # Avoid autoload issue such as 'Mail::Parsers::AddressStruct' # https://github.com/mikel/mail/issues/912#issuecomment-214850355 diff --git a/ee/config/feature_flags/experiment/stop_bulk_sidekiq_job_activation.yml b/ee/config/feature_flags/experiment/stop_bulk_sidekiq_job_activation.yml new file mode 100644 index 0000000000000..a9ff51fd87ff7 --- /dev/null +++ b/ee/config/feature_flags/experiment/stop_bulk_sidekiq_job_activation.yml @@ -0,0 +1,9 @@ +--- +name: stop_bulk_sidekiq_job_activation +feature_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/488887 +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/166652 +rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/502865 +milestone: '17.6' +group: group::provision +type: experiment +default_enabled: false diff --git a/ee/lib/gitlab/geo/cron_manager.rb b/ee/lib/gitlab/geo/cron_manager.rb index 649c657795188..753514cbba0c0 100644 --- a/ee/lib/gitlab/geo/cron_manager.rb +++ b/ee/lib/gitlab/geo/cron_manager.rb @@ -3,6 +3,11 @@ module Gitlab module Geo class CronManager + # This class helps to react to state changes in node types. We currently have non-geo nodes, primary and + # secondary geo nodes. This manager gets for example executed during Rails initialization, a promotion from a + # secondary to a primary node, during and also every minute by the `Geo::SidekiqCronConfigWorker` to ensure + # the correct status of the geo jobs. + COMMON_GEO_JOBS = %w[ geo_metrics_update_worker geo_verification_cron_worker @@ -25,11 +30,12 @@ class CronManager geo_sync_timeout_cron_worker ].freeze - GEO_JOBS = (COMMON_GEO_JOBS + PRIMARY_GEO_JOBS + SECONDARY_GEO_JOBS).freeze - CONFIG_WATCHER = 'geo_sidekiq_cron_config_worker' CONFIG_WATCHER_CLASS = 'Geo::SidekiqCronConfigWorker' + GEO_JOBS = (COMMON_GEO_JOBS + PRIMARY_GEO_JOBS + SECONDARY_GEO_JOBS).freeze + GEO_ALWAYS_ENABLED_JOBS = (COMMON_GEO_JOBS + COMMON_GEO_AND_NON_GEO_JOBS).freeze + def execute return unless Geo.connected? @@ -38,8 +44,7 @@ def execute elsif Geo.secondary? configure_secondary else - disable!(jobs(GEO_JOBS)) - enable!(all_jobs(except: GEO_JOBS)) + configure_non_geo_site end end @@ -56,36 +61,75 @@ def create_watcher! end end + # This method ensures disabled secondary geo jobs are enabled when promoting to a primary node. + # Applying the default config without `status` attributes won't enable previously disabled jobs. + def enable_all_jobs! + enable_jobs!(all_jobs) + end + private def configure_primary - disable!(jobs(SECONDARY_GEO_JOBS)) - enable!(all_jobs(except: SECONDARY_GEO_JOBS)) + disable!(SECONDARY_GEO_JOBS) + + if Feature.enabled?(:stop_bulk_sidekiq_job_activation) # rubocop:disable Gitlab/FeatureFlagWithoutActor -- No actor needed + enable!(GEO_ALWAYS_ENABLED_JOBS + PRIMARY_GEO_JOBS) + else + enable_all_except!(SECONDARY_GEO_JOBS) + end end def configure_secondary - names = [CONFIG_WATCHER, COMMON_GEO_JOBS, SECONDARY_GEO_JOBS, COMMON_GEO_AND_NON_GEO_JOBS].flatten + names = GEO_ALWAYS_ENABLED_JOBS + SECONDARY_GEO_JOBS - disable!(all_jobs(except: names)) - enable!(jobs(names)) + disable_all_except!(names) + enable!(names) end - def enable!(jobs) - jobs.compact.each { |job| job.enable! unless job.enabled? } + def configure_non_geo_site + disable!(GEO_JOBS) + + if Feature.enabled?(:stop_bulk_sidekiq_job_activation) # rubocop:disable Gitlab/FeatureFlagWithoutActor -- No actor needed + enable!(COMMON_GEO_AND_NON_GEO_JOBS) + else + enable_all_except!(GEO_JOBS) + end end - def disable!(jobs) - jobs.compact.each { |job| job.disable! unless job.disabled? } + def enable!(names) + enable_jobs!(jobs(names)) end - def all_jobs(except: []) - SidekiqSharding::Validator.allow_unrouted_sidekiq_calls do - Sidekiq::Cron::Job.all.reject { |job| except.include?(job.name) } - end + def disable!(names) + disable_jobs!(jobs(names)) + end + + def enable_all_except!(names) + enable_jobs!(all_jobs_except(names)) + end + + def disable_all_except!(names) + disable_jobs!(all_jobs_except(names)) + end + + def enable_jobs!(jobs) + jobs.each { |job| job.enable! unless job.enabled? } + end + + def disable_jobs!(jobs) + jobs.each { |job| job.disable! unless job.disabled? } + end + + def all_jobs_except(names = []) + all_jobs.reject { |job| names.include?(job.name) } + end + + def all_jobs + SidekiqSharding::Validator.allow_unrouted_sidekiq_calls { Sidekiq::Cron::Job.all } end def jobs(names) - names.map { |name| job(name) } + names.filter_map { |name| job(name) } end def job(name) diff --git a/ee/lib/gitlab/geo/geo_tasks.rb b/ee/lib/gitlab/geo/geo_tasks.rb index 9c53d64a14395..eb4de2508f462 100644 --- a/ee/lib/gitlab/geo/geo_tasks.rb +++ b/ee/lib/gitlab/geo/geo_tasks.rb @@ -31,11 +31,21 @@ def set_secondary_as_primary primary_node.destroy current_node.update!(primary: true, enabled: true) + initialize_cron_jobs + puts Rainbow("#{current_node.url} is now the primary Geo site").green end end end + # On secondary nodes, most jobs are disabled. When we promote, we aim to use the default job configuration. + # To achieve this, we first need to enable all jobs, as many of them lack a defined `status` and cannot override + # the `status` `disabled`. Once all jobs are enabled, we can apply the configuration and proceed with the setup. + def initialize_cron_jobs + Gitlab::Geo::CronManager.new.enable_all_jobs! + Gitlab::SidekiqConfig::CronJobInitializer.execute + end + def update_primary_geo_node_url node = Gitlab::Geo.primary_node diff --git a/ee/spec/lib/gitlab/geo/cron_manager_spec.rb b/ee/spec/lib/gitlab/geo/cron_manager_spec.rb index 3a8d952ffe350..a6297d3d4b1dd 100644 --- a/ee/spec/lib/gitlab/geo/cron_manager_spec.rb +++ b/ee/spec/lib/gitlab/geo/cron_manager_spec.rb @@ -6,8 +6,7 @@ RSpec.describe Gitlab::Geo::CronManager, :geo, :allow_unrouted_sidekiq_calls, feature_category: :geo_replication do include ::EE::GeoHelpers - jobs = %w[ - ldap_test + geo_jobs = %w[ repository_check_worker geo_registry_sync_worker geo_repository_registry_sync_worker @@ -18,10 +17,21 @@ geo_sync_timeout_cron_worker ].freeze + non_geo_jobs = %w[ldap_test] + def job(name) Sidekiq::Cron::Job.find(name) end + def init_cron_job(job_name, class_name, status: 'enabled') + Sidekiq::Cron::Job.new( + name: job_name, + cron: '0 * * * *', + class: class_name, + status: status + ).save # rubocop:disable Rails/SaveBang -- No ActiveRecord + end + subject(:manager) { described_class.new } describe '#execute' do @@ -32,7 +42,6 @@ def job(name) let(:ldap_test_job) { job('ldap_test') } let(:primary_jobs) { [job('geo_prune_event_log_worker')] } let(:repository_check_job) { job('repository_check_worker') } - let(:secondary_jobs) do [ job('geo_registry_sync_worker'), @@ -43,21 +52,12 @@ def job(name) end before_all do - jobs.each { |name| init_cron_job(name, name.camelize) } + geo_jobs.each { |name| init_cron_job(name, name.camelize) } + non_geo_jobs.each { |name| init_cron_job(name, name.camelize, status: 'disabled') } end after(:all) do - jobs.each { |name| job(name)&.destroy } # rubocop: disable Rails/SaveBang - end - - def init_cron_job(job_name, class_name) - job = Sidekiq::Cron::Job.new( - name: job_name, - cron: '0 * * * *', - class: class_name - ) - - job.enable! + (geo_jobs + non_geo_jobs).each { |name| job(name)&.destroy } # rubocop: disable Rails/SaveBang -- No ActiveRecord end def count_enabled(jobs) @@ -87,19 +87,34 @@ def count_enabled(jobs) expect(repository_check_job).to be_enabled end - it 'enables non-geo jobs' do - expect(ldap_test_job).to be_enabled + context 'with enabled feature flag stop_bulk_sidekiq_job_activation' do + it 'does not enable non-geo jobs' do + expect(ldap_test_job).not_to be_enabled + end end context 'No connection' do it 'does not change current job configuration' do allow(Geo).to receive(:connected?).and_return(false) - expect { manager.execute }.not_to change { count_enabled(jobs) } + expect { manager.execute }.not_to change { count_enabled(geo_jobs + non_geo_jobs) } end end end + context 'with geo primary and disabled feature flag stop_bulk_sidekiq_job_activation' do + before do + stub_feature_flags(stop_bulk_sidekiq_job_activation: false) + stub_current_geo_node(primary) + + manager.execute + end + + it 'enables non-geo jobs' do + expect(ldap_test_job).to be_enabled + end + end + context 'on a Geo secondary' do before do stub_current_geo_node(secondary) @@ -151,6 +166,21 @@ def count_enabled(jobs) expect(repository_check_job).to be_enabled end + context 'with enabled feature flag stop_bulk_sidekiq_job_activation' do + it 'does not enable non-geo jobs' do + expect(ldap_test_job).not_to be_enabled + end + end + end + + context 'with non geo and disabled feature flag stop_bulk_sidekiq_job_activation' do + before do + stub_feature_flags(stop_bulk_sidekiq_job_activation: false) + stub_current_geo_node(nil) + + manager.execute + end + it 'enables non-geo jobs' do expect(ldap_test_job).to be_enabled end @@ -169,4 +199,22 @@ def count_enabled(jobs) expect(created.name).to eq('geo_sidekiq_cron_config_worker') end end + + describe '#enable_all_jobs!' do + name = "job" + + before do + init_cron_job(name, name.camelize, status: 'disabled') + end + + after(:all) do + job(name).destroy # rubocop: disable Rails/SaveBang -- No ActiveRecord + end + + it 'enables all jobs' do + manager.enable_all_jobs! + + expect(job(name)).to be_enabled + end + end end diff --git a/ee/spec/lib/gitlab/geo/geo_tasks_spec.rb b/ee/spec/lib/gitlab/geo/geo_tasks_spec.rb index f36e6863d929e..bc8be7cc11128 100644 --- a/ee/spec/lib/gitlab/geo/geo_tasks_spec.rb +++ b/ee/spec/lib/gitlab/geo/geo_tasks_spec.rb @@ -27,6 +27,12 @@ let(:secondary) { create(:geo_node) } + let(:execute) do + Gitlab::SidekiqSharding::Validator.allow_unrouted_sidekiq_calls do + subject.set_secondary_as_primary + end + end + before do stub_current_geo_node(secondary) stub_current_node_name(secondary.name) @@ -37,7 +43,7 @@ expect(subject).to receive(:abort).with('The primary Geo site is not set').and_raise('aborted') - expect { subject.set_secondary_as_primary }.to raise_error('aborted') + expect { execute }.to raise_error('aborted') end it 'aborts if current node is not identified' do @@ -45,7 +51,7 @@ expect(subject).to receive(:abort).with('Current node is not identified').and_raise('aborted') - expect { subject.set_secondary_as_primary }.to raise_error('aborted') + expect { execute }.to raise_error('aborted') end it 'does nothing if run on a node that is not a secondary' do @@ -54,7 +60,7 @@ expect(subject).not_to receive(:abort) - expect { subject.set_secondary_as_primary }.to output(/#{secondary.url} is already the primary Geo site/).to_stdout + expect { execute }.to output(/#{secondary.url} is already the primary Geo site/).to_stdout expect(secondary.reload).to be_primary expect(primary.reload).to be_secondary end @@ -62,7 +68,7 @@ it 'sets the secondary as the primary node' do expect(subject).not_to receive(:abort) - expect { subject.set_secondary_as_primary }.to output(/#{secondary.url} is now the primary Geo site/).to_stdout + expect { execute }.to output(/#{secondary.url} is now the primary Geo site/).to_stdout expect(secondary.reload).to be_primary end @@ -71,10 +77,49 @@ expect(subject).not_to receive(:abort) - expect { subject.set_secondary_as_primary }.to output(/#{secondary.url} is now the primary Geo site/).to_stdout + expect { execute }.to output(/#{secondary.url} is now the primary Geo site/).to_stdout expect(secondary.reload).to be_primary expect(secondary.reload).to be_enabled end + + it 'loads configuration for all cron jobs' do + expect(Sidekiq::Cron::Job).to receive(:load_from_hash!).and_call_original + + execute + end + + it 'enables all cron jobs' do + cronjob1 = instance_double(Sidekiq::Cron::Job, enabled?: false, name: 'laser', disabled?: true) + cronjob2 = instance_double(Sidekiq::Cron::Job, enabled?: false, name: 'void', disabled?: true) + allow(Sidekiq::Cron::Job).to receive(:all).and_return([cronjob1, cronjob2]) + allow(Sidekiq::Cron::Job).to receive(:load_from_hash!) + expect(cronjob1).to receive(:enable!) + expect(cronjob2).to receive(:enable!) + + execute + end + + context 'when EE files are available', if: Gitlab.ee? do + it 'configures mirror and geo cron jobs' do + expect(Gitlab::Mirror).to receive(:configure_cron_job!) + expect(Gitlab::Geo).to receive(:configure_cron_jobs!) + + execute + end + + context 'for FOSS' do + before do + allow(GitlabEdition).to receive(:ee?).and_return(false) + end + + it 'does not configure mirror and geo cron jobs' do + expect(Gitlab::Mirror).not_to receive(:configure_cron_job!) + expect(Gitlab::Geo).not_to receive(:configure_cron_jobs!) + + execute + end + end + end end describe '.enable_maintenance_mode' do diff --git a/ee/spec/tasks/geo_rake_spec.rb b/ee/spec/tasks/geo_rake_spec.rb index c2a508f2119f4..a471d9b1e88a2 100644 --- a/ee/spec/tasks/geo_rake_spec.rb +++ b/ee/spec/tasks/geo_rake_spec.rb @@ -41,6 +41,11 @@ describe 'geo:set_secondary_as_primary', :use_clean_rails_memory_store_caching do let!(:current_node) { create(:geo_node) } let!(:primary_node) { create(:geo_node, :primary) } + let(:execute) do + Gitlab::SidekiqSharding::Validator.allow_unrouted_sidekiq_calls do + run_rake_task('geo:set_secondary_as_primary') + end + end before do stub_current_geo_node(current_node) @@ -52,7 +57,7 @@ # Pre-warming the cache. See https://gitlab.com/gitlab-org/gitlab/-/merge_requests/22021 Gitlab::Geo.primary_node - run_rake_task('geo:set_secondary_as_primary') + execute expect(current_node.primary?).to be_truthy expect(GeoNode.count).to eq(1) @@ -62,7 +67,7 @@ it 'enables silent mode' do stub_env('ENABLE_SILENT_MODE' => true) - expect { run_rake_task('geo:set_secondary_as_primary') } + expect { execute } .to change { Gitlab::CurrentSettings.silent_mode_enabled? } .from(false) .to(true) @@ -73,7 +78,7 @@ it 'does not enable silent mode' do stub_env('ENABLE_SILENT_MODE' => nil) - expect { run_rake_task('geo:set_secondary_as_primary') } + expect { execute } .not_to change { Gitlab::CurrentSettings.silent_mode_enabled? } end end diff --git a/lib/gitlab/sidekiq_config/cron_job_initializer.rb b/lib/gitlab/sidekiq_config/cron_job_initializer.rb new file mode 100644 index 0000000000000..3f6d5317b5971 --- /dev/null +++ b/lib/gitlab/sidekiq_config/cron_job_initializer.rb @@ -0,0 +1,30 @@ +# frozen_string_literal: true + +module Gitlab + module SidekiqConfig + class CronJobInitializer + class << self + # We apply Sidekiq job configurations for example during Rails initialization. Jobs have a `status` attribute + # with one of following values: + # - `nil`: Job is enabled. + # - `enabled`: Job is enabled. + # - `disabled`: Job is disabled. + # Reapplying configurations with `nil` status won't update a status of `enabled` or `disabled`. + # After applying the defaults, jobs are disabled or setup up based on the node type (e.g., non-geo, + # primary geo, or secondary geo). + + def execute + # Set source to schedule to clear any missing jobs + # See https://github.com/sidekiq-cron/sidekiq-cron/pull/431 + Sidekiq::Cron::Job.load_from_hash! Gitlab::SidekiqConfig.cron_jobs, source: 'schedule' + + Gitlab.ee do + Gitlab::Mirror.configure_cron_job! + + Gitlab::Geo.configure_cron_jobs! + end + end + end + end + end +end diff --git a/spec/initializers/sidekiq_spec.rb b/spec/lib/gitlab/sidekiq_config/cron_job_initializer_spec.rb similarity index 65% rename from spec/initializers/sidekiq_spec.rb rename to spec/lib/gitlab/sidekiq_config/cron_job_initializer_spec.rb index c489b7ea52699..42027e26b911d 100644 --- a/spec/initializers/sidekiq_spec.rb +++ b/spec/lib/gitlab/sidekiq_config/cron_job_initializer_spec.rb @@ -2,9 +2,9 @@ require 'spec_helper' -RSpec.describe 'sidekiq', feature_category: :build do - describe 'load_cron_jobs!' do - subject { load_cron_jobs! } +RSpec.describe Gitlab::SidekiqConfig::CronJobInitializer, feature_category: :build do + describe '#execute' do + subject(:execute) { described_class.execute } let(:cron_for_service_ping) { '4 7 * * 4' } @@ -42,23 +42,40 @@ original_settings = Gitlab.config['cron_jobs'] Gitlab.config['cron_jobs'] = cron_jobs_settings - example.run + Gitlab::SidekiqSharding::Validator.allow_unrouted_sidekiq_calls do + example.run + end Gitlab::SidekiqConfig.clear_memoization(:cron_jobs) Gitlab.config['cron_jobs'] = original_settings end it 'loads the cron jobs into sidekiq-cron' do - allow(Settings).to receive(:cron_for_service_ping).and_return(cron_for_service_ping) - expect(Sidekiq::Cron::Job).to receive(:load_from_hash!).with(cron_jobs_hash, source: 'schedule') - if Gitlab.ee? + execute + end + + context 'when EE files are available', if: Gitlab.ee? do + it 'configures mirror and geo cron jobs' do expect(Gitlab::Mirror).to receive(:configure_cron_job!) expect(Gitlab::Geo).to receive(:configure_cron_jobs!) + + execute end - subject + context 'for FOSS' do + before do + allow(GitlabEdition).to receive(:ee?).and_return(false) + end + + it 'does not configure mirror and geo cron jobs' do + expect(Gitlab::Mirror).not_to receive(:configure_cron_job!) + expect(Gitlab::Geo).not_to receive(:configure_cron_jobs!) + + execute + end + end end end end -- GitLab