diff --git a/ee/app/finders/geo/legacy_project_registry_mismatch_finder.rb b/ee/app/finders/geo/legacy_project_registry_mismatch_finder.rb new file mode 100644 index 0000000000000000000000000000000000000000..1908843e831a3422dcd4f336b6908c1f410219e6 --- /dev/null +++ b/ee/app/finders/geo/legacy_project_registry_mismatch_finder.rb @@ -0,0 +1,51 @@ +# frozen_string_literal: true + +# Finder for retrieving project registries that checksum mismatch +# scoped to a type (repository or wiki) using cross-database joins +# for selective sync. +# +# Basic usage: +# +# Geo::LegacyProjectRegistryMismatchFinder.new(current_node: Gitlab::Geo.current_node, :repository).execute +# +# Valid `type` values are: +# +# * `:repository` +# * `:wiki` +# +# Any other value will be ignored. +module Geo + class LegacyProjectRegistryMismatchFinder < RegistryFinder + def initialize(current_node: nil, type:) + super(current_node: current_node) + @type = type.to_s.to_sym + end + + def execute + if selective_sync? + mismatch_registries_for_selective_sync + else + mismatch_registries + end + end + + private + + attr_reader :type + + def mismatch_registries + Geo::ProjectRegistry.mismatch(type) + end + + # rubocop: disable CodeReuse/ActiveRecord + def mismatch_registries_for_selective_sync + legacy_inner_join_registry_ids( + mismatch_registries, + current_node.projects.pluck(:id), + Geo::ProjectRegistry, + foreign_key: :project_id + ) + end + # rubocop: enable CodeReuse/ActiveRecord + end +end diff --git a/ee/app/finders/geo/project_registry_finder.rb b/ee/app/finders/geo/project_registry_finder.rb index 673a5a0d186a471ed2c169ff1de971a1bd875fab..295662777010a10a36e8a8289fc47a0c23011a58 100644 --- a/ee/app/finders/geo/project_registry_finder.rb +++ b/ee/app/finders/geo/project_registry_finder.rb @@ -47,11 +47,15 @@ def find_verification_failed_project_registries(type = nil) end def count_repositories_checksum_mismatch - Geo::ProjectRegistry.repository_checksum_mismatch.count + registries_for_mismatch_projects(:repository).count end def count_wikis_checksum_mismatch - Geo::ProjectRegistry.wiki_checksum_mismatch.count + registries_for_mismatch_projects(:wiki).count + end + + def find_checksum_mismatch_project_registries(type = nil) + registries_for_mismatch_projects(type) end def count_repositories_retrying_verification @@ -62,14 +66,6 @@ def count_wikis_retrying_verification registries_retrying_verification(:wiki).count end - def find_checksum_mismatch_project_registries(type = nil) - if use_legacy_queries? - legacy_find_filtered_checksum_mismatch_projects(type) - else - find_filtered_checksum_mismatch_project_registries(type) - end - end - # Find all registries that need a repository or wiki verification def find_registries_to_verify(shard_name:, batch_size:) if use_legacy_queries? @@ -107,17 +103,6 @@ def find_projects_updated_recently(batch_size:) protected - def find_filtered_checksum_mismatch_project_registries(type = nil) - case type - when 'repository' - Geo::ProjectRegistry.repository_checksum_mismatch - when 'wiki' - Geo::ProjectRegistry.wiki_checksum_mismatch - else - Geo::ProjectRegistry.checksum_mismatch - end - end - # # FDW accessors # @@ -214,18 +199,6 @@ def quote_value(value) ::Gitlab::SQL::Glob.q(value) end - # @return [ActiveRecord::Relation<Geo::ProjectRegistry>] list of projects where there is a checksum_mismatch - # rubocop: disable CodeReuse/ActiveRecord - def legacy_find_filtered_checksum_mismatch_projects(type = nil) - legacy_inner_join_registry_ids( - find_filtered_checksum_mismatch_project_registries(type), - current_node.projects.pluck(:id), - Geo::ProjectRegistry, - foreign_key: :project_id - ) - end - # rubocop: enable CodeReuse/ActiveRecord - # @return [ActiveRecord::Relation<Geo::ProjectRegistry>] list of registries that need verification # rubocop: disable CodeReuse/ActiveRecord def legacy_find_registries_to_verify(shard_name:, batch_size:) @@ -373,5 +346,19 @@ def registries_retrying_verification(type) .new(current_node: current_node, type: type) .execute end + + def finder_klass_for_mismatch_registries + if Gitlab::Geo::Fdw.enabled_for_selective_sync? + Geo::ProjectRegistryMismatchFinder + else + Geo::LegacyProjectRegistryMismatchFinder + end + end + + def registries_for_mismatch_projects(type) + finder_klass_for_mismatch_registries + .new(current_node: current_node, type: type) + .execute + end end end diff --git a/ee/app/finders/geo/project_registry_mismatch_finder.rb b/ee/app/finders/geo/project_registry_mismatch_finder.rb new file mode 100644 index 0000000000000000000000000000000000000000..700b69ca038a58304965dbdadbe3c79a90370f84 --- /dev/null +++ b/ee/app/finders/geo/project_registry_mismatch_finder.rb @@ -0,0 +1,31 @@ +# frozen_string_literal: true + +# Finder for retrieving project registries that checksum mismatch +# scoped to a type (repository or wiki) using FDW queries. +# +# Basic usage: +# +# Geo::ProjectRegistryMismatchFinder.new(current_node: Gitlab::Geo.current_node, :repository).execute +# +# Valid `type` values are: +# +# * `:repository` +# * `:wiki` +# +# Any other value will be ignored. +module Geo + class ProjectRegistryMismatchFinder + def initialize(current_node:, type:) + @current_node = Geo::Fdw::GeoNode.find(current_node.id) + @type = type.to_s.to_sym + end + + def execute + current_node.project_registries.mismatch(type) + end + + private + + attr_reader :current_node, :type + end +end diff --git a/ee/app/models/geo/project_registry.rb b/ee/app/models/geo/project_registry.rb index b39ebe8c98037c18a059935953a8f3bc8565cbee..ecf7f9fc8be05d3c1515e740c0a2f2373b4630f9 100644 --- a/ee/app/models/geo/project_registry.rb +++ b/ee/app/models/geo/project_registry.rb @@ -138,6 +138,17 @@ def self.retrying_verification(type) end end + def self.mismatch(type) + case type + when :repository + repository_checksum_mismatch + when :wiki + wiki_checksum_mismatch + else + repository_checksum_mismatch.or(wiki_checksum_mismatch) + end + end + def self.flag_repositories_for_resync! update_all( resync_repository: true, diff --git a/ee/changelogs/unreleased/8798-geo-implement-selective-sync-support-for-fdw-queries-to-find-mismatch-registries.yml b/ee/changelogs/unreleased/8798-geo-implement-selective-sync-support-for-fdw-queries-to-find-mismatch-registries.yml new file mode 100644 index 0000000000000000000000000000000000000000..248e3069af0b1e921f704e14dc720886ebb37854 --- /dev/null +++ b/ee/changelogs/unreleased/8798-geo-implement-selective-sync-support-for-fdw-queries-to-find-mismatch-registries.yml @@ -0,0 +1,5 @@ +--- +title: Geo - Add selective sync support for the FDW queries to find mismatch registries +merge_request: 10434 +author: +type: changed diff --git a/ee/spec/finders/geo/legacy_project_registry_mismatch_finder_spec.rb b/ee/spec/finders/geo/legacy_project_registry_mismatch_finder_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..273a7fb79070d1b915591fe67dba5ddd15146e3e --- /dev/null +++ b/ee/spec/finders/geo/legacy_project_registry_mismatch_finder_spec.rb @@ -0,0 +1,123 @@ +# frozen_string_literal: true + +require 'spec_helper' + +describe Geo::LegacyProjectRegistryMismatchFinder, :geo do + include EE::GeoHelpers + + describe '#execute' do + let(:node) { create(:geo_node) } + let(:group_1) { create(:group) } + let(:group_2) { create(:group) } + let(:nested_group_1) { create(:group, parent: group_1) } + let(:project_1) { create(:project, group: group_1) } + let(:project_2) { create(:project, group: nested_group_1) } + let(:project_3) { create(:project, group: nested_group_1) } + let(:project_4) { create(:project, :broken_storage, group: group_2) } + let(:project_5) { create(:project, :broken_storage, group: group_2) } + let!(:registry_mismatch) { create(:geo_project_registry, :repository_checksum_mismatch, :wiki_checksum_mismatch, project: project_1) } + let!(:registry_repository_mismatch) { create(:geo_project_registry, :repository_checksum_mismatch, :wiki_verified, project: project_2) } + let!(:registry_wiki_mismatch) { create(:geo_project_registry, :repository_verified, :wiki_checksum_mismatch, project: project_3) } + let!(:registry_wiki_mismatch_broken_shard) { create(:geo_project_registry, :repository_verified, :wiki_checksum_mismatch, project: project_4) } + let!(:registry_repository_mismatch_broken_shard) { create(:geo_project_registry, :repository_checksum_mismatch, :wiki_verified, project: project_5) } + let!(:registry_verified) { create(:geo_project_registry, :repository_verified, :wiki_verified) } + + shared_examples 'finds mismatch registries' do + context 'with repository type' do + subject { described_class.new(current_node: node, type: :repository) } + + context 'without selective sync' do + it 'returns all mismatch registries' do + expect(subject.execute).to contain_exactly(registry_mismatch, registry_repository_mismatch, registry_repository_mismatch_broken_shard) + end + end + + context 'with selective sync by namespace' do + it 'returns mismatch registries where projects belongs to the namespaces' do + node.update!(selective_sync_type: 'namespaces', namespaces: [group_1, nested_group_1]) + + expect(subject.execute).to contain_exactly(registry_mismatch, registry_repository_mismatch) + end + end + + context 'with selective sync by shard' do + it 'returns mismatch registries where projects belongs to the shards' do + node.update!(selective_sync_type: 'shards', selective_sync_shards: ['broken']) + + expect(subject.execute).to contain_exactly(registry_repository_mismatch_broken_shard) + end + end + end + + context 'with wiki type' do + subject { described_class.new(current_node: node, type: :wiki) } + + context 'without selective sync' do + it 'returns all mismatch registries' do + expect(subject.execute).to contain_exactly(registry_mismatch, registry_wiki_mismatch, registry_wiki_mismatch_broken_shard) + end + end + + context 'with selective sync by namespace' do + it 'returns mismatch registries where projects belongs to the namespaces' do + node.update!(selective_sync_type: 'namespaces', namespaces: [group_1, nested_group_1]) + + expect(subject.execute).to contain_exactly(registry_mismatch, registry_wiki_mismatch) + end + end + + context 'with selective sync by shard' do + it 'returns mismatch registries where projects belongs to the shards' do + node.update!(selective_sync_type: 'shards', selective_sync_shards: ['broken']) + + expect(subject.execute).to contain_exactly(registry_wiki_mismatch_broken_shard) + end + end + end + + context 'with invalid type' do + subject { described_class.new(current_node: node, type: :invalid) } + + context 'without selective sync' do + it 'returns all mismatch registries' do + expect(subject.execute).to contain_exactly(registry_mismatch, registry_repository_mismatch, registry_wiki_mismatch, registry_repository_mismatch_broken_shard, registry_wiki_mismatch_broken_shard) + end + end + + context 'with selective sync by namespace' do + it 'returns all mismatch registries where projects belongs to the namespaces' do + node.update!(selective_sync_type: 'namespaces', namespaces: [group_1, nested_group_1]) + + expect(subject.execute).to contain_exactly(registry_mismatch, registry_repository_mismatch, registry_wiki_mismatch) + end + end + + context 'with selective sync by shard' do + it 'returns all mismatch registries where projects belongs to the shards' do + node.update!(selective_sync_type: 'shards', selective_sync_shards: ['broken']) + + expect(subject.execute).to contain_exactly(registry_repository_mismatch_broken_shard, registry_wiki_mismatch_broken_shard) + end + end + end + end + + # Disable transactions via :delete method because a foreign table + # can't see changes inside a transaction of a different connection. + context 'FDW', :delete do + before do + skip('FDW is not configured') unless Gitlab::Geo::Fdw.enabled? + end + + include_examples 'finds mismatch registries' + end + + context 'Legacy' do + before do + stub_fdw_disabled + end + + include_examples 'finds mismatch registries' + end + end +end diff --git a/ee/spec/finders/geo/project_registry_finder_spec.rb b/ee/spec/finders/geo/project_registry_finder_spec.rb index f8eaf6cc8049e6f0c73c51df7290b90851cf9a8b..782aedca9da2a7f1d501c822127dd44be63f25c5 100644 --- a/ee/spec/finders/geo/project_registry_finder_spec.rb +++ b/ee/spec/finders/geo/project_registry_finder_spec.rb @@ -174,15 +174,6 @@ expect(subject.count_verification_failed_repositories).to eq 1 end - - it 'counts projects that verification has failed' do - create(:geo_project_registry, :repository_verified, project: project_repository_verified) - create(:geo_project_registry, :repository_verification_failed, project: project_repository_verification_failed) - create(:geo_project_registry, :wiki_verified, project: project_wiki_verified) - create(:geo_project_registry, :wiki_verification_failed, project: project_wiki_verification_failed) - - expect(subject.count_verification_failed_repositories).to eq 1 - end end describe '#count_verification_failed_wikis' do @@ -245,60 +236,50 @@ end end end - end - - describe '#find_checksum_mismatch_project_registries' do - it 'delegates to #find_filtered_checksum_mismatch_project_registries' do - expect(subject).to receive(:find_filtered_checksum_mismatch_project_registries).and_call_original - - subject.find_checksum_mismatch_project_registries - end - - it 'delegates to #legacy_find_filtered_checksum_mismatch_projects when use_legacy_queries is true' do - expect(subject).to receive(:use_legacy_queries?).and_return(true) - - expect(subject).to receive(:legacy_find_filtered_checksum_mismatch_projects).and_call_original - - subject.find_checksum_mismatch_project_registries - end - it 'delegates to #find_filtered_checksum_mismatch_project_registries when use_legacy_queries is false' do - expect(subject).to receive(:use_legacy_queries?).and_return(false) + describe '#count_repositories_checksum_mismatch' do + let(:project_1_in_synced_group) { create(:project, group: synced_group) } + let(:project_2_in_synced_group) { create(:project, group: synced_group) } - expect(subject).to receive(:find_filtered_checksum_mismatch_project_registries).and_call_original + let!(:registry_mismatch) { create(:geo_project_registry, :repository_checksum_mismatch, :wiki_checksum_mismatch, project: project_synced) } + let!(:repository_mismatch) { create(:geo_project_registry, :repository_checksum_mismatch, project: project_1_in_synced_group) } + let!(:wiki_mismatch) { create(:geo_project_registry, :wiki_checksum_mismatch, project: project_2_in_synced_group) } - subject.find_checksum_mismatch_project_registries - end + it 'counts registries that repository mismatch' do + expect(subject.count_repositories_checksum_mismatch).to eq 2 + end - it 'counts projects with a checksum mismatch' do - repository_mismatch1 = create(:geo_project_registry, :repository_checksum_mismatch) - repository_mismatch2 = create(:geo_project_registry, :repository_checksum_mismatch) - create(:geo_project_registry, :wiki_verified) - wiki_mismatch = create(:geo_project_registry, :wiki_checksum_mismatch) + context 'with selective sync' do + before do + secondary.update!(selective_sync_type: 'namespaces', namespaces: [synced_group]) + end - expect(subject.find_checksum_mismatch_project_registries('wiki')).to match_array(wiki_mismatch) - expect(subject.find_checksum_mismatch_project_registries('repository')).to match_array([repository_mismatch1, repository_mismatch2]) - expect(subject.find_checksum_mismatch_project_registries(nil)).to match_array([repository_mismatch1, repository_mismatch2, wiki_mismatch]) + it 'counts projects that sync has failed' do + expect(subject.count_repositories_checksum_mismatch).to eq 1 + end + end end - context 'with selective sync' do - before do - secondary.update!(selective_sync_type: 'namespaces', namespaces: [synced_group]) - end + describe '#count_wikis_checksum_mismatch' do + let(:project_1_in_synced_group) { create(:project, group: synced_group) } + let(:project_2_in_synced_group) { create(:project, group: synced_group) } - it 'delegates to #legacy_find_filtered_checksum_mismatch_projects' do - expect(subject).to receive(:legacy_find_filtered_checksum_mismatch_projects).and_call_original + let!(:registry_mismatch) { create(:geo_project_registry, :repository_checksum_mismatch, :wiki_checksum_mismatch, project: project_synced) } + let!(:repository_mismatch) { create(:geo_project_registry, :repository_checksum_mismatch, project: project_1_in_synced_group) } + let!(:wiki_mismatch) { create(:geo_project_registry, :wiki_checksum_mismatch, project: project_2_in_synced_group) } - subject.find_checksum_mismatch_project_registries + it 'counts projects that verification has failed' do + expect(subject.count_wikis_checksum_mismatch).to eq 2 end - it 'returns projects with a checksum mismatch' do - project_1_in_synced_group = create(:project, group: synced_group) - project_1_registry_record = create(:geo_project_registry, :repository_checksum_mismatch, project: project_1_in_synced_group) - - projects = subject.find_checksum_mismatch_project_registries('repository') + context 'with selective sync' do + before do + secondary.update!(selective_sync_type: 'namespaces', namespaces: [synced_group]) + end - expect(projects).to match_ids(project_1_registry_record) + it 'counts projects that sync has failed' do + expect(subject.count_wikis_checksum_mismatch).to eq 1 + end end end end @@ -427,6 +408,49 @@ end end + describe '#find_checksum_mismatch_project_registries' do + let(:project_1_in_synced_group) { create(:project, group: synced_group) } + let(:project_2_in_synced_group) { create(:project, group: synced_group) } + + let!(:registry_mismatch) { create(:geo_project_registry, :repository_checksum_mismatch, :wiki_checksum_mismatch, project: project_synced) } + let!(:repository_mismatch) { create(:geo_project_registry, :repository_checksum_mismatch, project: project_1_in_synced_group) } + let!(:wiki_mismatch) { create(:geo_project_registry, :wiki_checksum_mismatch, project: project_2_in_synced_group) } + + it 'returns only project registries that repository mismatch when type is set to repository' do + expect(subject.find_checksum_mismatch_project_registries('repository')).to contain_exactly(registry_mismatch, repository_mismatch) + end + + it 'returns only project registries that repository mismatch when type is set to repository' do + expect(subject.find_checksum_mismatch_project_registries('wiki')).to contain_exactly(registry_mismatch, wiki_mismatch) + end + + it 'returns project registries that repository or wiki mismatch' do + expect(subject.find_checksum_mismatch_project_registries).to contain_exactly(registry_mismatch, repository_mismatch, wiki_mismatch) + end + + context 'with selective sync' do + before do + secondary.update!(selective_sync_type: 'namespaces', namespaces: [synced_group]) + end + + it 'returns only project registries that repository mismatch when type is set to repository' do + create(:geo_project_registry, :repository_checksum_mismatch) + + expect(subject.find_checksum_mismatch_project_registries('repository')).to contain_exactly(repository_mismatch) + end + + it 'returns only project registries that repository mismatch when type is set to repository' do + create(:geo_project_registry, :wiki_checksum_mismatch) + + expect(subject.find_checksum_mismatch_project_registries('wiki')).to contain_exactly(wiki_mismatch) + end + + it 'returns project registries that repository or wiki mismatch' do + expect(subject.find_checksum_mismatch_project_registries).to contain_exactly(repository_mismatch, wiki_mismatch) + end + end + end + describe '#find_registries_to_verify' do it 'delegates to the correct method' do expect(subject).to receive("#{method_prefix}_find_registries_to_verify".to_sym).and_call_original diff --git a/ee/spec/finders/geo/project_registry_mismatch_finder_spec.rb b/ee/spec/finders/geo/project_registry_mismatch_finder_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..bb0a160eb12915b7038b452d2eeb5c977a149f9c --- /dev/null +++ b/ee/spec/finders/geo/project_registry_mismatch_finder_spec.rb @@ -0,0 +1,107 @@ +# frozen_string_literal: true + +require 'spec_helper' + +describe Geo::ProjectRegistryMismatchFinder, :geo do + # Disable transactions via :delete method because a foreign table + # can't see changes inside a transaction of a different connection. + describe '#execute', :delete do + let(:node) { create(:geo_node) } + let(:group_1) { create(:group) } + let(:group_2) { create(:group) } + let(:nested_group_1) { create(:group, parent: group_1) } + let(:project_1) { create(:project, group: group_1) } + let(:project_2) { create(:project, group: nested_group_1) } + let(:project_3) { create(:project, group: nested_group_1) } + let(:project_4) { create(:project, :broken_storage, group: group_2) } + let(:project_5) { create(:project, :broken_storage, group: group_2) } + let!(:registry_mismatch) { create(:geo_project_registry, :repository_checksum_mismatch, :wiki_checksum_mismatch, project: project_1) } + let!(:registry_repository_mismatch) { create(:geo_project_registry, :repository_checksum_mismatch, :wiki_verified, project: project_2) } + let!(:registry_wiki_mismatch) { create(:geo_project_registry, :repository_verified, :wiki_checksum_mismatch, project: project_3) } + let!(:registry_wiki_mismatch_broken_shard) { create(:geo_project_registry, :repository_verified, :wiki_checksum_mismatch, project: project_4) } + let!(:registry_repository_mismatch_broken_shard) { create(:geo_project_registry, :repository_checksum_mismatch, :wiki_verified, project: project_5) } + let!(:registry_verified) { create(:geo_project_registry, :repository_verified, :wiki_verified) } + + before do + skip('FDW is not configured') unless Gitlab::Geo::Fdw.enabled? + end + + context 'with repository type' do + subject { described_class.new(current_node: node, type: :repository) } + + context 'without selective sync' do + it 'returns all mismatch registries' do + expect(subject.execute).to contain_exactly(registry_mismatch, registry_repository_mismatch, registry_repository_mismatch_broken_shard) + end + end + + context 'with selective sync by namespace' do + it 'returns mismatch registries where projects belongs to the namespaces' do + node.update!(selective_sync_type: 'namespaces', namespaces: [group_1, nested_group_1]) + + expect(subject.execute).to contain_exactly(registry_mismatch, registry_repository_mismatch) + end + end + + context 'with selective sync by shard' do + it 'returns mismatch registries where projects belongs to the shards' do + node.update!(selective_sync_type: 'shards', selective_sync_shards: ['broken']) + + expect(subject.execute).to contain_exactly(registry_repository_mismatch_broken_shard) + end + end + end + + context 'with wiki type' do + subject { described_class.new(current_node: node, type: :wiki) } + + context 'without selective sync' do + it 'returns all mismatch registries' do + expect(subject.execute).to contain_exactly(registry_mismatch, registry_wiki_mismatch, registry_wiki_mismatch_broken_shard) + end + end + + context 'with selective sync by namespace' do + it 'returns mismatch registries where projects belongs to the namespaces' do + node.update!(selective_sync_type: 'namespaces', namespaces: [group_1, nested_group_1]) + + expect(subject.execute).to contain_exactly(registry_mismatch, registry_wiki_mismatch) + end + end + + context 'with selective sync by shard' do + it 'returns mismatch registries where projects belongs to the shards' do + node.update!(selective_sync_type: 'shards', selective_sync_shards: ['broken']) + + expect(subject.execute).to contain_exactly(registry_wiki_mismatch_broken_shard) + end + end + end + + context 'with invalid type' do + subject { described_class.new(current_node: node, type: :invalid) } + + context 'without selective sync' do + it 'returns all mismatch registries' do + expect(subject.execute).to contain_exactly(registry_mismatch, registry_repository_mismatch, registry_wiki_mismatch, registry_repository_mismatch_broken_shard, registry_wiki_mismatch_broken_shard) + end + end + + context 'with selective sync by namespace' do + it 'returns all mismatch registries where projects belongs to the namespaces' do + node.update!(selective_sync_type: 'namespaces', namespaces: [group_1, nested_group_1]) + + expect(subject.execute).to contain_exactly(registry_mismatch, registry_repository_mismatch, registry_wiki_mismatch) + end + end + + context 'with selective sync by shard' do + it 'returns all mismatch registries where projects belongs to the shards' do + node.update!(selective_sync_type: 'shards', selective_sync_shards: ['broken']) + + expect(subject.execute).to contain_exactly(registry_repository_mismatch_broken_shard, registry_wiki_mismatch_broken_shard) + end + end + end + end +end