From 73a3bc83a3caab7aa842d78ec5e6bb8e5b770302 Mon Sep 17 00:00:00 2001
From: Terri Chu <tchu@gitlab.com>
Date: Mon, 2 Aug 2021 22:47:01 +0000
Subject: [PATCH] Support displaying indexed filenames for large files

---
 GITLAB_ELASTICSEARCH_INDEXER_VERSION                  |  2 +-
 app/views/search/results/_blob_data.html.haml         | 11 +++++++++--
 doc/administration/instance_limits.md                 |  4 ++--
 .../_elasticsearch_form.html.haml                     |  2 +-
 ee/spec/lib/gitlab/elastic/indexer_spec.rb            | 11 ++++++++---
 locale/gitlab.pot                                     |  5 ++++-
 6 files changed, 25 insertions(+), 10 deletions(-)

diff --git a/GITLAB_ELASTICSEARCH_INDEXER_VERSION b/GITLAB_ELASTICSEARCH_INDEXER_VERSION
index d8b698973a491..fb2c0766b7cc2 100644
--- a/GITLAB_ELASTICSEARCH_INDEXER_VERSION
+++ b/GITLAB_ELASTICSEARCH_INDEXER_VERSION
@@ -1 +1 @@
-2.12.0
+2.13.0
diff --git a/app/views/search/results/_blob_data.html.haml b/app/views/search/results/_blob_data.html.haml
index fb2825ad15ef1..b2f2b9396c714 100644
--- a/app/views/search/results/_blob_data.html.haml
+++ b/app/views/search/results/_blob_data.html.haml
@@ -7,5 +7,12 @@
           = search_blob_title(project, path)
       = copy_file_path_button(path)
     - if blob.data
-      .file-content.code.term{ data: { qa_selector: 'file_text_content' } }
-        = render 'shared/file_highlight', blob: blob, first_line_number: blob.startline, blob_link: blob_link, highlight_line: blob.highlight_line
+      - if blob.data.size > 0
+        .file-content.code.term{ data: { qa_selector: 'file_text_content' } }
+          = render 'shared/file_highlight', blob: blob, first_line_number: blob.startline, blob_link: blob_link, highlight_line: blob.highlight_line
+      - else
+        .file-content.code
+          .nothing-here-block
+            .gl-text-gray-600.gl-font-sm
+              - max_file_size_indexed = Gitlab::CurrentSettings.elasticsearch_indexed_file_size_limit_kb.kilobytes
+              = _('The file could not be displayed because it is empty or larger than the maximum file size indexed (%{size}).') % { size: number_to_human_size(max_file_size_indexed) }
diff --git a/doc/administration/instance_limits.md b/doc/administration/instance_limits.md
index 383562362f4d1..bd1be0981ce20 100644
--- a/doc/administration/instance_limits.md
+++ b/doc/administration/instance_limits.md
@@ -626,8 +626,8 @@ Reports that go over the 20 MB limit won't be loaded. Affected reports:
 > [Introduced](https://gitlab.com/gitlab-org/gitlab/-/issues/8638) in GitLab 13.3.
 
 You can set a limit on the content of repository files that are indexed in
-Elasticsearch. Any files larger than this limit is neither indexed
-nor searchable.
+Elasticsearch. Any files larger than this limit only index the file name. 
+The file content is neither indexed nor searchable.
 
 Setting a limit helps reduce the memory usage of the indexing processes and
 the overall index size. This value defaults to `1024 KiB` (1 MiB) as any
diff --git a/ee/app/views/admin/application_settings/_elasticsearch_form.html.haml b/ee/app/views/admin/application_settings/_elasticsearch_form.html.haml
index afa47fcdfbfed..56fb62530e48d 100644
--- a/ee/app/views/admin/application_settings/_elasticsearch_form.html.haml
+++ b/ee/app/views/admin/application_settings/_elasticsearch_form.html.haml
@@ -109,7 +109,7 @@
               = f.label :elasticsearch_indexed_file_size_limit_kb, _('Maximum file size indexed (KiB)'), class: 'label-bold'
               = f.number_field :elasticsearch_indexed_file_size_limit_kb, value: @application_setting.elasticsearch_indexed_file_size_limit_kb, class: 'form-control gl-form-input'
               .form-text.gl-text-gray-600.gl-mt-0
-                = _('Any files larger than this limit will not be indexed, and thus will not be searchable.')
+                = _('Any files larger than this limit only index the file name. The file content is neither indexed nor searchable.')
 
             .form-group
               = f.label :elasticsearch_indexed_field_length_limit, _('Maximum field length'), class: 'label-bold'
diff --git a/ee/spec/lib/gitlab/elastic/indexer_spec.rb b/ee/spec/lib/gitlab/elastic/indexer_spec.rb
index 425886b44906d..376f4e409a745 100644
--- a/ee/spec/lib/gitlab/elastic/indexer_spec.rb
+++ b/ee/spec/lib/gitlab/elastic/indexer_spec.rb
@@ -370,10 +370,15 @@ def indexed_wiki_paths_for(term)
       index_repository(project)
     end
 
-    it 'does not index that file' do
+    it 'indexes the file with empty content' do
       files = indexed_file_paths_for('file')
-      expect(files).to include('small_file.txt')
-      expect(files).not_to include('large_file.txt')
+      expect(files).to include('small_file.txt', 'large_file.txt')
+
+      blobs = Repository.elastic_search('large_file', type: 'blob')[:blobs][:results].response
+      large_file_blob = blobs.find do |blob|
+        'large_file.txt' == blob['_source']['blob']['path']
+      end
+      expect(large_file_blob['_source']['blob']['content']).to eq('')
     end
   end
 
diff --git a/locale/gitlab.pot b/locale/gitlab.pot
index 0600deae8beef..4a1a09581e2f4 100644
--- a/locale/gitlab.pot
+++ b/locale/gitlab.pot
@@ -3903,7 +3903,7 @@ msgstr ""
 msgid "Any encrypted tokens"
 msgstr ""
 
-msgid "Any files larger than this limit will not be indexed, and thus will not be searchable."
+msgid "Any files larger than this limit only index the file name. The file content is neither indexed nor searchable."
 msgstr ""
 
 msgid "Any label"
@@ -32831,6 +32831,9 @@ msgstr ""
 msgid "The file containing the export is not available yet; it may still be transferring. Please try again later."
 msgstr ""
 
+msgid "The file could not be displayed because it is empty or larger than the maximum file size indexed (%{size})."
+msgstr ""
+
 msgid "The file has been successfully created."
 msgstr ""
 
-- 
GitLab