diff --git a/lib/gitlab/database/reindexing/index_selection.rb b/lib/gitlab/database/reindexing/index_selection.rb index 2d384f2f9e23e0452cdb6300e26c1c183a48f5cd..f9874626105bc93d52bc600571c010c35966c6e8 100644 --- a/lib/gitlab/database/reindexing/index_selection.rb +++ b/lib/gitlab/database/reindexing/index_selection.rb @@ -12,6 +12,10 @@ class IndexSelection # Only consider indexes beyond this size (before reindexing) INDEX_SIZE_MINIMUM = 1.gigabyte + VERY_LARGE_TABLES = %i[ + ci_builds + ].freeze + delegate :each, to: :indexes def initialize(candidates) @@ -30,13 +34,24 @@ def indexes # we force a N+1 pattern here and estimate bloat on a per-index # basis. - @indexes ||= candidates - .not_recently_reindexed - .where('ondisk_size_bytes >= ?', INDEX_SIZE_MINIMUM) + @indexes ||= relations_that_need_cleaning_before_deadline .sort_by(&:relative_bloat_level) # forced N+1 .reverse .select { |candidate| candidate.relative_bloat_level >= MINIMUM_RELATIVE_BLOAT } end + + def relations_that_need_cleaning_before_deadline + relation = candidates.not_recently_reindexed.where('ondisk_size_bytes >= ?', INDEX_SIZE_MINIMUM) + relation = relation.where.not(tablename: VERY_LARGE_TABLES) if too_late_for_very_large_table? + relation + end + + # The reindexing process takes place during the weekends and starting a + # reindexing action on a large table late on Sunday could span during + # Monday. We don't want this because it prevents vacuum from running. + def too_late_for_very_large_table? + Date.today.sunday? + end end end end diff --git a/spec/lib/gitlab/database/reindexing/index_selection_spec.rb b/spec/lib/gitlab/database/reindexing/index_selection_spec.rb index 2ae9037959d4764157fcbd8a2e44f18de561e0ce..9f31716ab9485ebd709acd8723602ad1ff018578 100644 --- a/spec/lib/gitlab/database/reindexing/index_selection_spec.rb +++ b/spec/lib/gitlab/database/reindexing/index_selection_spec.rb @@ -74,4 +74,22 @@ def execute(sql) expect(subject.map(&:name).sort).to eq(not_recently_reindexed.map(&:name).sort) end end + + context 'with restricted tables' do + let!(:ci_builds) do + create( + :postgres_index_bloat_estimate, + index: create(:postgres_index, ondisk_size_bytes: 100.gigabytes, tablename: 'ci_builds'), + bloat_size_bytes: 20.gigabyte + ) + end + + context 'when executed on Saturdays', time_travel_to: '2022-12-17T09:44:07Z' do + it { expect(subject).to include(ci_builds.index) } + end + + context 'when executed on Sundays', time_travel_to: '2022-12-18T09:44:07Z' do + it { expect(subject).not_to include(ci_builds.index) } + end + end end