diff --git a/config/feature_flags/development/elasticsearch_use_traversal_id_optimization.yml b/config/feature_flags/development/elasticsearch_use_traversal_id_optimization.yml new file mode 100644 index 0000000000000000000000000000000000000000..499da142527efd7ca50ae7b5087d6de279d2985c --- /dev/null +++ b/config/feature_flags/development/elasticsearch_use_traversal_id_optimization.yml @@ -0,0 +1,8 @@ +--- +name: elasticsearch_use_traversal_id_optimization +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/108135 +rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/391286 +milestone: '15.9' +type: development +group: group::global search +default_enabled: false diff --git a/ee/lib/elastic/latest/application_class_proxy.rb b/ee/lib/elastic/latest/application_class_proxy.rb index 30aec3007505f8604c67531519e70d427d6cb7a1..8d5ee587321b22ee4ea189438dcc4e874fd16603 100644 --- a/ee/lib/elastic/latest/application_class_proxy.rb +++ b/ee/lib/elastic/latest/application_class_proxy.rb @@ -406,7 +406,7 @@ def authorized_namespace_ids(current_user, options = {}) authorized_ids.intersection(options[:group_ids].to_set).to_a end - def ancestry_filter(current_user, namespace_ancestry) + def ancestry_filter(current_user, namespace_ancestry, prefix:) return {} unless current_user return {} if namespace_ancestry.blank? @@ -414,7 +414,7 @@ def ancestry_filter(current_user, namespace_ancestry) filters = namespace_ancestry.map do |namespace_ids| { prefix: { - namespace_ancestry_ids: { + "#{prefix}": { _name: context.name(:descendants), value: namespace_ids } diff --git a/ee/lib/elastic/latest/git_class_proxy.rb b/ee/lib/elastic/latest/git_class_proxy.rb index 0516c3f8f8b5b3f6b8deeb9a419c0a170413720b..b696759f9074de19665e21ee0abf63bec48f9dee 100644 --- a/ee/lib/elastic/latest/git_class_proxy.rb +++ b/ee/lib/elastic/latest/git_class_proxy.rb @@ -49,6 +49,29 @@ def blob_aggregations(query, options) private + def should_use_project_ids_filter?(options) + return true if options[:project_ids] == :any || options[:group_ids].blank? || Feature.disabled?(:elasticsearch_use_traversal_id_optimization) + + !Elastic::DataMigrationService.migration_has_finished?(:backfill_traversal_ids_to_blobs_and_wiki_blobs) + end + + def authorization_filter(query_hash, options) + return project_ids_filter(query_hash, options) if should_use_project_ids_filter?(options) + + current_user = options[:current_user] + traversal_ids = Namespace.find(authorized_namespace_ids(current_user, options)) + .map(&:elastic_namespace_ancestry) + + return project_ids_filter(query_hash, options) if traversal_ids.blank? + + context.name(:namespace) do + query_hash[:query][:bool][:filter] ||= [] + query_hash[:query][:bool][:filter] << ancestry_filter(current_user, traversal_ids, prefix: :traversal_ids) + end + + query_hash + end + def options_filter_context(type, options) repository_ids = [options[:repository_id]].flatten languages = [options[:language]].flatten @@ -276,7 +299,7 @@ def blob_query(query, type: 'blob', page: 1, per: 20, options: {}) # # Note that `:current_user` might be `nil` for a anonymous user if options.key?(:current_user) - query_hash = context.name(:blob, :authorized) { project_ids_filter(query_hash, options) } + query_hash = context.name(:blob, :authorized) { authorization_filter(query_hash, options) } end # add the document type filter diff --git a/ee/lib/elastic/latest/issue_class_proxy.rb b/ee/lib/elastic/latest/issue_class_proxy.rb index d39f131ce5dcaf47805bdce1739c3241a38b6492..c426ac46443a61ca9bc7f5875ba61bdcff189d50 100644 --- a/ee/lib/elastic/latest/issue_class_proxy.rb +++ b/ee/lib/elastic/latest/issue_class_proxy.rb @@ -105,7 +105,7 @@ def authorization_filter(query_hash, options) context.name(:namespace) do query_hash[:query][:bool][:filter] ||= [] - query_hash[:query][:bool][:filter] << ancestry_filter(current_user, namespace_ancestry) + query_hash[:query][:bool][:filter] << ancestry_filter(current_user, namespace_ancestry, prefix: :namespace_ancestry_ids) end query_hash diff --git a/ee/lib/gitlab/elastic/group_search_results.rb b/ee/lib/gitlab/elastic/group_search_results.rb index 9cfa8035b87938a2bb2a57381eaeef052c21f2e9..514f31290f1f0b1972809b2a545758b9001095ad 100644 --- a/ee/lib/gitlab/elastic/group_search_results.rb +++ b/ee/lib/gitlab/elastic/group_search_results.rb @@ -22,8 +22,9 @@ def initialize(current_user, query, limit_project_ids = nil, group:, public_and_ override :scope_options def scope_options(scope) + # group_ids to options for traversal_ids filtering case scope - when :issues + when :issues, :blobs, :wiki_blobs super.merge(group_ids: [group.id]) when :users super.merge(group_id: group.id) diff --git a/ee/lib/gitlab/elastic/search_results.rb b/ee/lib/gitlab/elastic/search_results.rb index 7a2f6558ad6d9dde31b53766f5c3783f63a156d5..3a2b01034b1b19f0138a7101d04a0fbb9a53e398 100644 --- a/ee/lib/gitlab/elastic/search_results.rb +++ b/ee/lib/gitlab/elastic/search_results.rb @@ -281,6 +281,8 @@ def scope_options(scope) base_options.merge(features: [:issues, :merge_requests]) when :users base_options.merge(admin: current_user&.admin?, routing_disabled: true) # rubocop:disable Cop/UserAdmin + when :blobs + base_options.merge(filters.slice(:language)) else base_options end @@ -335,7 +337,7 @@ def blobs(page: 1, per_page: DEFAULT_PER_PAGE, count_only: false, preload_method query, page: (page || 1).to_i, per: per_page, - options: base_options.merge(count_only: count_only).merge(filters.slice(:language)), + options: scope_options(:blobs).merge(count_only: count_only), preload_method: preload_method ) end @@ -349,7 +351,7 @@ def wiki_blobs(page: 1, per_page: DEFAULT_PER_PAGE, count_only: false) query, page: (page || 1).to_i, per: per_page, - options: base_options.merge(count_only: count_only) + options: scope_options(:wiki_blobs).merge(count_only: count_only) ) end end @@ -368,7 +370,7 @@ def commits(page: 1, per_page: DEFAULT_PER_PAGE, preload_method: nil, count_only query, page: (page || 1).to_i, per_page: per_page, - options: base_options.merge(count_only: count_only), + options: scope_options(:commits).merge(count_only: count_only), preload_method: preload_method ) end diff --git a/ee/spec/lib/elastic/latest/git_class_proxy_spec.rb b/ee/spec/lib/elastic/latest/git_class_proxy_spec.rb index eec7dda31de7947e2484d282721a7347003ce1fc..bc3b2ea8c4323d631d8e41c5536fa0e50d2c8269 100644 --- a/ee/spec/lib/elastic/latest/git_class_proxy_spec.rb +++ b/ee/spec/lib/elastic/latest/git_class_proxy_spec.rb @@ -2,7 +2,7 @@ require 'spec_helper' -RSpec.describe Elastic::Latest::GitClassProxy, :elastic do +RSpec.describe Elastic::Latest::GitClassProxy, :elastic, feature_category: :global_search do let(:project) { create(:project, :public, :repository) } let(:included_class) { Elastic::Latest::RepositoryClassProxy } @@ -86,4 +86,42 @@ assert_named_queries('doc:is_a:blob', 'blob:match:search_terms') end + + context 'when backfilling migration is incomplete' do + let_it_be(:user) { create(:user) } + + before do + set_elasticsearch_migration_to(:backfill_traversal_ids_to_blobs_and_wiki_blobs, including: false) + stub_feature_flags(elasticsearch_use_traversal_id_optimization: false) + end + + it 'does not use the traversal_id filter' do + expect(Namespace).not_to receive(:find) + subject.elastic_search_as_found_blob('*', options: { current_user: user, group_ids: [1] }) + end + end + + context 'when backfilling migration is complete' do + let_it_be(:user) { create(:user) } + + before do + set_elasticsearch_migration_to(:backfill_traversal_ids_to_blobs_and_wiki_blobs, including: true) + stub_feature_flags(elasticsearch_use_traversal_id_optimization: true) + end + + it 'does not use the traversal_id filter when project_ids are passed' do + expect(Namespace).not_to receive(:find) + subject.elastic_search_as_found_blob('*', options: { current_user: user, project_ids: [1, 2] }) + end + + it 'does not use the traversal_id filter when group_ids are not passed' do + expect(Namespace).not_to receive(:find) + subject.elastic_search_as_found_blob('*', options: { current_user: user }) + end + + it 'uses the traversal_id filter' do + expect(Namespace).to receive(:find).once.and_call_original + subject.elastic_search_as_found_blob('*', options: { current_user: user, group_ids: [1] }) + end + end end diff --git a/ee/spec/services/search/group_service_spec.rb b/ee/spec/services/search/group_service_spec.rb index e554fada9c9bad1e2eb56ce127e332cf2f304bf2..0bee2837be431c2aab19ded1e330f98558e4c876 100644 --- a/ee/spec/services/search/group_service_spec.rb +++ b/ee/spec/services/search/group_service_spec.rb @@ -271,6 +271,17 @@ end it_behaves_like 'search respects visibility' + + context 'elasticsearch_use_group_level_optimization is enabled and migration is completed' do + before do + stub_feature_flags(elasticsearch_use_traversal_id_optimization: true) + set_elasticsearch_migration_to(:backfill_traversal_ids_to_blobs_and_wiki_blobs, including: true) + end + + with_them do + it_behaves_like 'search respects visibility' + end + end end end