From 7791ff738e952f5d95b885c6f1cb37cd79e158a8 Mon Sep 17 00:00:00 2001
From: Siddharth Dungarwal <sdungarwal@gitlab.com>
Date: Wed, 15 Feb 2023 18:09:07 +0000
Subject: [PATCH] Search using traversal_ids for wiki_blobs and blobs

---
 ...icsearch_use_traversal_id_optimization.yml |  8 ++++
 .../elastic/latest/application_class_proxy.rb |  4 +-
 ee/lib/elastic/latest/git_class_proxy.rb      | 25 +++++++++++-
 ee/lib/elastic/latest/issue_class_proxy.rb    |  2 +-
 ee/lib/gitlab/elastic/group_search_results.rb |  3 +-
 ee/lib/gitlab/elastic/search_results.rb       |  8 ++--
 .../elastic/latest/git_class_proxy_spec.rb    | 40 ++++++++++++++++++-
 ee/spec/services/search/group_service_spec.rb | 11 +++++
 8 files changed, 92 insertions(+), 9 deletions(-)
 create mode 100644 config/feature_flags/development/elasticsearch_use_traversal_id_optimization.yml

diff --git a/config/feature_flags/development/elasticsearch_use_traversal_id_optimization.yml b/config/feature_flags/development/elasticsearch_use_traversal_id_optimization.yml
new file mode 100644
index 0000000000000..499da142527ef
--- /dev/null
+++ b/config/feature_flags/development/elasticsearch_use_traversal_id_optimization.yml
@@ -0,0 +1,8 @@
+---
+name: elasticsearch_use_traversal_id_optimization
+introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/108135
+rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/391286
+milestone: '15.9'
+type: development
+group: group::global search
+default_enabled: false
diff --git a/ee/lib/elastic/latest/application_class_proxy.rb b/ee/lib/elastic/latest/application_class_proxy.rb
index 30aec3007505f..8d5ee587321b2 100644
--- a/ee/lib/elastic/latest/application_class_proxy.rb
+++ b/ee/lib/elastic/latest/application_class_proxy.rb
@@ -406,7 +406,7 @@ def authorized_namespace_ids(current_user, options = {})
         authorized_ids.intersection(options[:group_ids].to_set).to_a
       end
 
-      def ancestry_filter(current_user, namespace_ancestry)
+      def ancestry_filter(current_user, namespace_ancestry, prefix:)
         return {} unless current_user
         return {} if namespace_ancestry.blank?
 
@@ -414,7 +414,7 @@ def ancestry_filter(current_user, namespace_ancestry)
           filters = namespace_ancestry.map do |namespace_ids|
             {
               prefix: {
-                namespace_ancestry_ids: {
+                "#{prefix}": {
                   _name: context.name(:descendants),
                   value: namespace_ids
                 }
diff --git a/ee/lib/elastic/latest/git_class_proxy.rb b/ee/lib/elastic/latest/git_class_proxy.rb
index 0516c3f8f8b5b..b696759f9074d 100644
--- a/ee/lib/elastic/latest/git_class_proxy.rb
+++ b/ee/lib/elastic/latest/git_class_proxy.rb
@@ -49,6 +49,29 @@ def blob_aggregations(query, options)
 
       private
 
+      def should_use_project_ids_filter?(options)
+        return true if options[:project_ids] == :any || options[:group_ids].blank? || Feature.disabled?(:elasticsearch_use_traversal_id_optimization)
+
+        !Elastic::DataMigrationService.migration_has_finished?(:backfill_traversal_ids_to_blobs_and_wiki_blobs)
+      end
+
+      def authorization_filter(query_hash, options)
+        return project_ids_filter(query_hash, options) if should_use_project_ids_filter?(options)
+
+        current_user = options[:current_user]
+        traversal_ids = Namespace.find(authorized_namespace_ids(current_user, options))
+                                      .map(&:elastic_namespace_ancestry)
+
+        return project_ids_filter(query_hash, options) if traversal_ids.blank?
+
+        context.name(:namespace) do
+          query_hash[:query][:bool][:filter] ||= []
+          query_hash[:query][:bool][:filter] << ancestry_filter(current_user, traversal_ids, prefix: :traversal_ids)
+        end
+
+        query_hash
+      end
+
       def options_filter_context(type, options)
         repository_ids = [options[:repository_id]].flatten
         languages = [options[:language]].flatten
@@ -276,7 +299,7 @@ def blob_query(query, type: 'blob', page: 1, per: 20, options: {})
         #
         # Note that `:current_user` might be `nil` for a anonymous user
         if options.key?(:current_user)
-          query_hash = context.name(:blob, :authorized) { project_ids_filter(query_hash, options) }
+          query_hash = context.name(:blob, :authorized) { authorization_filter(query_hash, options) }
         end
 
         # add the document type filter
diff --git a/ee/lib/elastic/latest/issue_class_proxy.rb b/ee/lib/elastic/latest/issue_class_proxy.rb
index d39f131ce5dca..c426ac46443a6 100644
--- a/ee/lib/elastic/latest/issue_class_proxy.rb
+++ b/ee/lib/elastic/latest/issue_class_proxy.rb
@@ -105,7 +105,7 @@ def authorization_filter(query_hash, options)
 
         context.name(:namespace) do
           query_hash[:query][:bool][:filter] ||= []
-          query_hash[:query][:bool][:filter] << ancestry_filter(current_user, namespace_ancestry)
+          query_hash[:query][:bool][:filter] << ancestry_filter(current_user, namespace_ancestry, prefix: :namespace_ancestry_ids)
         end
 
         query_hash
diff --git a/ee/lib/gitlab/elastic/group_search_results.rb b/ee/lib/gitlab/elastic/group_search_results.rb
index 9cfa8035b8793..514f31290f1f0 100644
--- a/ee/lib/gitlab/elastic/group_search_results.rb
+++ b/ee/lib/gitlab/elastic/group_search_results.rb
@@ -22,8 +22,9 @@ def initialize(current_user, query, limit_project_ids = nil, group:, public_and_
 
       override :scope_options
       def scope_options(scope)
+        # group_ids to options for traversal_ids filtering
         case scope
-        when :issues
+        when :issues, :blobs, :wiki_blobs
           super.merge(group_ids: [group.id])
         when :users
           super.merge(group_id: group.id)
diff --git a/ee/lib/gitlab/elastic/search_results.rb b/ee/lib/gitlab/elastic/search_results.rb
index 7a2f6558ad6d9..3a2b01034b1b1 100644
--- a/ee/lib/gitlab/elastic/search_results.rb
+++ b/ee/lib/gitlab/elastic/search_results.rb
@@ -281,6 +281,8 @@ def scope_options(scope)
           base_options.merge(features: [:issues, :merge_requests])
         when :users
           base_options.merge(admin: current_user&.admin?, routing_disabled: true) # rubocop:disable Cop/UserAdmin
+        when :blobs
+          base_options.merge(filters.slice(:language))
         else
           base_options
         end
@@ -335,7 +337,7 @@ def blobs(page: 1, per_page: DEFAULT_PER_PAGE, count_only: false, preload_method
             query,
             page: (page || 1).to_i,
             per: per_page,
-            options: base_options.merge(count_only: count_only).merge(filters.slice(:language)),
+            options: scope_options(:blobs).merge(count_only: count_only),
             preload_method: preload_method
           )
         end
@@ -349,7 +351,7 @@ def wiki_blobs(page: 1, per_page: DEFAULT_PER_PAGE, count_only: false)
             query,
             page: (page || 1).to_i,
             per: per_page,
-            options: base_options.merge(count_only: count_only)
+            options: scope_options(:wiki_blobs).merge(count_only: count_only)
           )
         end
       end
@@ -368,7 +370,7 @@ def commits(page: 1, per_page: DEFAULT_PER_PAGE, preload_method: nil, count_only
             query,
             page: (page || 1).to_i,
             per_page: per_page,
-            options: base_options.merge(count_only: count_only),
+            options: scope_options(:commits).merge(count_only: count_only),
             preload_method: preload_method
           )
         end
diff --git a/ee/spec/lib/elastic/latest/git_class_proxy_spec.rb b/ee/spec/lib/elastic/latest/git_class_proxy_spec.rb
index eec7dda31de79..bc3b2ea8c4323 100644
--- a/ee/spec/lib/elastic/latest/git_class_proxy_spec.rb
+++ b/ee/spec/lib/elastic/latest/git_class_proxy_spec.rb
@@ -2,7 +2,7 @@
 
 require 'spec_helper'
 
-RSpec.describe Elastic::Latest::GitClassProxy, :elastic do
+RSpec.describe Elastic::Latest::GitClassProxy, :elastic, feature_category: :global_search do
   let(:project) { create(:project, :public, :repository) }
   let(:included_class) { Elastic::Latest::RepositoryClassProxy }
 
@@ -86,4 +86,42 @@
     assert_named_queries('doc:is_a:blob',
                          'blob:match:search_terms')
   end
+
+  context 'when backfilling migration is incomplete' do
+    let_it_be(:user) { create(:user) }
+
+    before do
+      set_elasticsearch_migration_to(:backfill_traversal_ids_to_blobs_and_wiki_blobs, including: false)
+      stub_feature_flags(elasticsearch_use_traversal_id_optimization: false)
+    end
+
+    it 'does not use the traversal_id filter' do
+      expect(Namespace).not_to receive(:find)
+      subject.elastic_search_as_found_blob('*', options: { current_user: user, group_ids: [1] })
+    end
+  end
+
+  context 'when backfilling migration is complete' do
+    let_it_be(:user) { create(:user) }
+
+    before do
+      set_elasticsearch_migration_to(:backfill_traversal_ids_to_blobs_and_wiki_blobs, including: true)
+      stub_feature_flags(elasticsearch_use_traversal_id_optimization: true)
+    end
+
+    it 'does not use the traversal_id filter when project_ids are passed' do
+      expect(Namespace).not_to receive(:find)
+      subject.elastic_search_as_found_blob('*', options: { current_user: user, project_ids: [1, 2] })
+    end
+
+    it 'does not use the traversal_id filter when group_ids are not passed' do
+      expect(Namespace).not_to receive(:find)
+      subject.elastic_search_as_found_blob('*', options: { current_user: user })
+    end
+
+    it 'uses the traversal_id filter' do
+      expect(Namespace).to receive(:find).once.and_call_original
+      subject.elastic_search_as_found_blob('*', options: { current_user: user, group_ids: [1] })
+    end
+  end
 end
diff --git a/ee/spec/services/search/group_service_spec.rb b/ee/spec/services/search/group_service_spec.rb
index e554fada9c9ba..0bee2837be431 100644
--- a/ee/spec/services/search/group_service_spec.rb
+++ b/ee/spec/services/search/group_service_spec.rb
@@ -271,6 +271,17 @@
         end
 
         it_behaves_like 'search respects visibility'
+
+        context 'elasticsearch_use_group_level_optimization is enabled and migration is completed' do
+          before do
+            stub_feature_flags(elasticsearch_use_traversal_id_optimization: true)
+            set_elasticsearch_migration_to(:backfill_traversal_ids_to_blobs_and_wiki_blobs, including: true)
+          end
+
+          with_them do
+            it_behaves_like 'search respects visibility'
+          end
+        end
       end
     end
 
-- 
GitLab