diff --git a/doc/integration/elasticsearch.md b/doc/integration/elasticsearch.md index 1c80fc543af890aa14ddeba114c453c0116ff7b9..eee05eaef0213dbce3c9e89e392ae2e115e217fc 100644 --- a/doc/integration/elasticsearch.md +++ b/doc/integration/elasticsearch.md @@ -333,6 +333,10 @@ curl --request PUT localhost:9200/gitlab-production/_settings --data '{ Enable Elasticsearch search in **Admin > Settings > Integrations**. That's it. Enjoy it! +### Index limit + +Currently for repository and snippet files, GitLab would only index up to 1 MB of content, in order to avoid indexing timeout. + ## GitLab Elasticsearch Rake Tasks There are several rake tasks available to you via the command line: diff --git a/ee/changelogs/unreleased/12111-snippet-index.yml b/ee/changelogs/unreleased/12111-snippet-index.yml new file mode 100644 index 0000000000000000000000000000000000000000..669177304d942b73dc986dfa997decf2f886e6f3 --- /dev/null +++ b/ee/changelogs/unreleased/12111-snippet-index.yml @@ -0,0 +1,5 @@ +--- +title: "Elasticsearch: index snippet content only up to 1 MB" +merge_request: 15215 +author: +type: changed diff --git a/ee/lib/elastic/latest/snippet_instance_proxy.rb b/ee/lib/elastic/latest/snippet_instance_proxy.rb index 3da2837a0a30d581e462c3e5ffb4a247369ea6b0..45b05e01d0a8910b45a6d641ede8354a5f5b332c 100644 --- a/ee/lib/elastic/latest/snippet_instance_proxy.rb +++ b/ee/lib/elastic/latest/snippet_instance_proxy.rb @@ -3,6 +3,8 @@ module Elastic module Latest class SnippetInstanceProxy < ApplicationInstanceProxy + MAX_INDEX_SIZE = 1.megabyte + def as_indexed_json(options = {}) # We don't use as_json(only: ...) because it calls all virtual and serialized attributes # https://gitlab.com/gitlab-org/gitlab-ee/issues/349 @@ -22,6 +24,10 @@ def as_indexed_json(options = {}) data[attr.to_s] = safely_read_attribute_for_elasticsearch(attr) end + if data['content'].bytesize > MAX_INDEX_SIZE + data['content'] = data['content'].mb_chars.limit(MAX_INDEX_SIZE).to_s # rubocop: disable CodeReuse/ActiveRecord + end + # ES6 is now single-type per index, so we implement our own typing data['type'] = es_type diff --git a/ee/spec/lib/elastic/latest/snippet_instance_proxy_spec.rb b/ee/spec/lib/elastic/latest/snippet_instance_proxy_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..863ce4fa4b24880e0b1d2c602d4a91d6b3ad0bbc --- /dev/null +++ b/ee/spec/lib/elastic/latest/snippet_instance_proxy_spec.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +require 'spec_helper' + +describe Elastic::Latest::SnippetInstanceProxy do + let(:snippet) { create(:personal_snippet) } + + subject { described_class.new(snippet) } + + context '#as_indexed_json' do + it 'serializes snippet as hash' do + expect(subject.as_indexed_json.with_indifferent_access).to include( + id: snippet.id, + title: snippet.title, + file_name: snippet.file_name, + content: snippet.content, + created_at: snippet.created_at, + updated_at: snippet.updated_at, + project_id: snippet.project_id, + author_id: snippet.author_id, + visibility_level: snippet.visibility_level + ) + end + end +end diff --git a/ee/spec/lib/gitlab/elastic/snippet_search_results_spec.rb b/ee/spec/lib/gitlab/elastic/snippet_search_results_spec.rb index 86c3bf5f0811fa26afa03469f2991286fcb63827..36d27c47baa85d5354420832eb91ed2f52990b11 100644 --- a/ee/spec/lib/gitlab/elastic/snippet_search_results_spec.rb +++ b/ee/spec/lib/gitlab/elastic/snippet_search_results_spec.rb @@ -61,4 +61,14 @@ expect(results.snippet_blobs_count).to eq(1) end end + + context 'when content is too long' do + let(:content) { "abc" + (" " * Elastic::Latest::SnippetInstanceProxy::MAX_INDEX_SIZE) + "xyz" } + let(:snippet) { create(:personal_snippet, :public, content: content) } + + it 'indexes up to a limit' do + expect(described_class.new(nil, 'abc').snippet_blobs_count).to eq(1) + expect(described_class.new(nil, 'xyz').snippet_blobs_count).to eq(0) + end + end end