From 7791ff738e952f5d95b885c6f1cb37cd79e158a8 Mon Sep 17 00:00:00 2001 From: Siddharth Dungarwal <sdungarwal@gitlab.com> Date: Wed, 15 Feb 2023 18:09:07 +0000 Subject: [PATCH] Search using traversal_ids for wiki_blobs and blobs --- ...icsearch_use_traversal_id_optimization.yml | 8 ++++ .../elastic/latest/application_class_proxy.rb | 4 +- ee/lib/elastic/latest/git_class_proxy.rb | 25 +++++++++++- ee/lib/elastic/latest/issue_class_proxy.rb | 2 +- ee/lib/gitlab/elastic/group_search_results.rb | 3 +- ee/lib/gitlab/elastic/search_results.rb | 8 ++-- .../elastic/latest/git_class_proxy_spec.rb | 40 ++++++++++++++++++- ee/spec/services/search/group_service_spec.rb | 11 +++++ 8 files changed, 92 insertions(+), 9 deletions(-) create mode 100644 config/feature_flags/development/elasticsearch_use_traversal_id_optimization.yml diff --git a/config/feature_flags/development/elasticsearch_use_traversal_id_optimization.yml b/config/feature_flags/development/elasticsearch_use_traversal_id_optimization.yml new file mode 100644 index 0000000000000..499da142527ef --- /dev/null +++ b/config/feature_flags/development/elasticsearch_use_traversal_id_optimization.yml @@ -0,0 +1,8 @@ +--- +name: elasticsearch_use_traversal_id_optimization +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/108135 +rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/391286 +milestone: '15.9' +type: development +group: group::global search +default_enabled: false diff --git a/ee/lib/elastic/latest/application_class_proxy.rb b/ee/lib/elastic/latest/application_class_proxy.rb index 30aec3007505f..8d5ee587321b2 100644 --- a/ee/lib/elastic/latest/application_class_proxy.rb +++ b/ee/lib/elastic/latest/application_class_proxy.rb @@ -406,7 +406,7 @@ def authorized_namespace_ids(current_user, options = {}) authorized_ids.intersection(options[:group_ids].to_set).to_a end - def ancestry_filter(current_user, namespace_ancestry) + def ancestry_filter(current_user, namespace_ancestry, prefix:) return {} unless current_user return {} if namespace_ancestry.blank? @@ -414,7 +414,7 @@ def ancestry_filter(current_user, namespace_ancestry) filters = namespace_ancestry.map do |namespace_ids| { prefix: { - namespace_ancestry_ids: { + "#{prefix}": { _name: context.name(:descendants), value: namespace_ids } diff --git a/ee/lib/elastic/latest/git_class_proxy.rb b/ee/lib/elastic/latest/git_class_proxy.rb index 0516c3f8f8b5b..b696759f9074d 100644 --- a/ee/lib/elastic/latest/git_class_proxy.rb +++ b/ee/lib/elastic/latest/git_class_proxy.rb @@ -49,6 +49,29 @@ def blob_aggregations(query, options) private + def should_use_project_ids_filter?(options) + return true if options[:project_ids] == :any || options[:group_ids].blank? || Feature.disabled?(:elasticsearch_use_traversal_id_optimization) + + !Elastic::DataMigrationService.migration_has_finished?(:backfill_traversal_ids_to_blobs_and_wiki_blobs) + end + + def authorization_filter(query_hash, options) + return project_ids_filter(query_hash, options) if should_use_project_ids_filter?(options) + + current_user = options[:current_user] + traversal_ids = Namespace.find(authorized_namespace_ids(current_user, options)) + .map(&:elastic_namespace_ancestry) + + return project_ids_filter(query_hash, options) if traversal_ids.blank? + + context.name(:namespace) do + query_hash[:query][:bool][:filter] ||= [] + query_hash[:query][:bool][:filter] << ancestry_filter(current_user, traversal_ids, prefix: :traversal_ids) + end + + query_hash + end + def options_filter_context(type, options) repository_ids = [options[:repository_id]].flatten languages = [options[:language]].flatten @@ -276,7 +299,7 @@ def blob_query(query, type: 'blob', page: 1, per: 20, options: {}) # # Note that `:current_user` might be `nil` for a anonymous user if options.key?(:current_user) - query_hash = context.name(:blob, :authorized) { project_ids_filter(query_hash, options) } + query_hash = context.name(:blob, :authorized) { authorization_filter(query_hash, options) } end # add the document type filter diff --git a/ee/lib/elastic/latest/issue_class_proxy.rb b/ee/lib/elastic/latest/issue_class_proxy.rb index d39f131ce5dca..c426ac46443a6 100644 --- a/ee/lib/elastic/latest/issue_class_proxy.rb +++ b/ee/lib/elastic/latest/issue_class_proxy.rb @@ -105,7 +105,7 @@ def authorization_filter(query_hash, options) context.name(:namespace) do query_hash[:query][:bool][:filter] ||= [] - query_hash[:query][:bool][:filter] << ancestry_filter(current_user, namespace_ancestry) + query_hash[:query][:bool][:filter] << ancestry_filter(current_user, namespace_ancestry, prefix: :namespace_ancestry_ids) end query_hash diff --git a/ee/lib/gitlab/elastic/group_search_results.rb b/ee/lib/gitlab/elastic/group_search_results.rb index 9cfa8035b8793..514f31290f1f0 100644 --- a/ee/lib/gitlab/elastic/group_search_results.rb +++ b/ee/lib/gitlab/elastic/group_search_results.rb @@ -22,8 +22,9 @@ def initialize(current_user, query, limit_project_ids = nil, group:, public_and_ override :scope_options def scope_options(scope) + # group_ids to options for traversal_ids filtering case scope - when :issues + when :issues, :blobs, :wiki_blobs super.merge(group_ids: [group.id]) when :users super.merge(group_id: group.id) diff --git a/ee/lib/gitlab/elastic/search_results.rb b/ee/lib/gitlab/elastic/search_results.rb index 7a2f6558ad6d9..3a2b01034b1b1 100644 --- a/ee/lib/gitlab/elastic/search_results.rb +++ b/ee/lib/gitlab/elastic/search_results.rb @@ -281,6 +281,8 @@ def scope_options(scope) base_options.merge(features: [:issues, :merge_requests]) when :users base_options.merge(admin: current_user&.admin?, routing_disabled: true) # rubocop:disable Cop/UserAdmin + when :blobs + base_options.merge(filters.slice(:language)) else base_options end @@ -335,7 +337,7 @@ def blobs(page: 1, per_page: DEFAULT_PER_PAGE, count_only: false, preload_method query, page: (page || 1).to_i, per: per_page, - options: base_options.merge(count_only: count_only).merge(filters.slice(:language)), + options: scope_options(:blobs).merge(count_only: count_only), preload_method: preload_method ) end @@ -349,7 +351,7 @@ def wiki_blobs(page: 1, per_page: DEFAULT_PER_PAGE, count_only: false) query, page: (page || 1).to_i, per: per_page, - options: base_options.merge(count_only: count_only) + options: scope_options(:wiki_blobs).merge(count_only: count_only) ) end end @@ -368,7 +370,7 @@ def commits(page: 1, per_page: DEFAULT_PER_PAGE, preload_method: nil, count_only query, page: (page || 1).to_i, per_page: per_page, - options: base_options.merge(count_only: count_only), + options: scope_options(:commits).merge(count_only: count_only), preload_method: preload_method ) end diff --git a/ee/spec/lib/elastic/latest/git_class_proxy_spec.rb b/ee/spec/lib/elastic/latest/git_class_proxy_spec.rb index eec7dda31de79..bc3b2ea8c4323 100644 --- a/ee/spec/lib/elastic/latest/git_class_proxy_spec.rb +++ b/ee/spec/lib/elastic/latest/git_class_proxy_spec.rb @@ -2,7 +2,7 @@ require 'spec_helper' -RSpec.describe Elastic::Latest::GitClassProxy, :elastic do +RSpec.describe Elastic::Latest::GitClassProxy, :elastic, feature_category: :global_search do let(:project) { create(:project, :public, :repository) } let(:included_class) { Elastic::Latest::RepositoryClassProxy } @@ -86,4 +86,42 @@ assert_named_queries('doc:is_a:blob', 'blob:match:search_terms') end + + context 'when backfilling migration is incomplete' do + let_it_be(:user) { create(:user) } + + before do + set_elasticsearch_migration_to(:backfill_traversal_ids_to_blobs_and_wiki_blobs, including: false) + stub_feature_flags(elasticsearch_use_traversal_id_optimization: false) + end + + it 'does not use the traversal_id filter' do + expect(Namespace).not_to receive(:find) + subject.elastic_search_as_found_blob('*', options: { current_user: user, group_ids: [1] }) + end + end + + context 'when backfilling migration is complete' do + let_it_be(:user) { create(:user) } + + before do + set_elasticsearch_migration_to(:backfill_traversal_ids_to_blobs_and_wiki_blobs, including: true) + stub_feature_flags(elasticsearch_use_traversal_id_optimization: true) + end + + it 'does not use the traversal_id filter when project_ids are passed' do + expect(Namespace).not_to receive(:find) + subject.elastic_search_as_found_blob('*', options: { current_user: user, project_ids: [1, 2] }) + end + + it 'does not use the traversal_id filter when group_ids are not passed' do + expect(Namespace).not_to receive(:find) + subject.elastic_search_as_found_blob('*', options: { current_user: user }) + end + + it 'uses the traversal_id filter' do + expect(Namespace).to receive(:find).once.and_call_original + subject.elastic_search_as_found_blob('*', options: { current_user: user, group_ids: [1] }) + end + end end diff --git a/ee/spec/services/search/group_service_spec.rb b/ee/spec/services/search/group_service_spec.rb index e554fada9c9ba..0bee2837be431 100644 --- a/ee/spec/services/search/group_service_spec.rb +++ b/ee/spec/services/search/group_service_spec.rb @@ -271,6 +271,17 @@ end it_behaves_like 'search respects visibility' + + context 'elasticsearch_use_group_level_optimization is enabled and migration is completed' do + before do + stub_feature_flags(elasticsearch_use_traversal_id_optimization: true) + set_elasticsearch_migration_to(:backfill_traversal_ids_to_blobs_and_wiki_blobs, including: true) + end + + with_them do + it_behaves_like 'search respects visibility' + end + end end end -- GitLab