diff --git a/ee/elastic/docs/20240123181031_reindex_issue_to_update_analyzer_for_title.yml b/ee/elastic/docs/20240123181031_reindex_issue_to_update_analyzer_for_title.yml new file mode 100644 index 0000000000000000000000000000000000000000..4da20d22b722a06d35421fa7b8df8e8ac14ed8bf --- /dev/null +++ b/ee/elastic/docs/20240123181031_reindex_issue_to_update_analyzer_for_title.yml @@ -0,0 +1,10 @@ +--- +name: ReindexIssueToUpdateAnalyzerForTitle +version: '20240123181031' +description: This migration reindexes the issues index to start using new analyzer for title +group: group::global search +milestone: '16.9' +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/142580 +obsolete: false +marked_obsolete_by_url: +marked_obsolete_in_milestone: diff --git a/ee/elastic/migrate/20240123181031_reindex_issue_to_update_analyzer_for_title.rb b/ee/elastic/migrate/20240123181031_reindex_issue_to_update_analyzer_for_title.rb new file mode 100644 index 0000000000000000000000000000000000000000..374596c6cb8e9f89733387837a7a028c15b52d18 --- /dev/null +++ b/ee/elastic/migrate/20240123181031_reindex_issue_to_update_analyzer_for_title.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +class ReindexIssueToUpdateAnalyzerForTitle < Elastic::Migration + def migrate + Elastic::ReindexingTask.create!(targets: %w[Issue], options: { skip_pending_migrations_check: true }) + end + + def completed? + true + end +end diff --git a/ee/lib/elastic/latest/config.rb b/ee/lib/elastic/latest/config.rb index fb953abe48c41e7dfa165cbb7b5cc3b3cd1635f1..1a9e5420672ba97df23a2e3a7f5c190155e23b3c 100644 --- a/ee/lib/elastic/latest/config.rb +++ b/ee/lib/elastic/latest/config.rb @@ -25,6 +25,11 @@ module Config codec: 'best_compression', analysis: { analyzer: { + title_analyzer: { + tokenizer: 'standard', + filter: %w[word_delimiter_graph_filter asciifolding lowercase stemmer flatten_graph remove_duplicates] + }, + default: { tokenizer: 'standard', filter: %w[lowercase stemmer] diff --git a/ee/lib/elastic/latest/issue_config.rb b/ee/lib/elastic/latest/issue_config.rb index f34ee61894c0c02b9d49cc5629210f259337ca07..f82e3e2f07546a1d0491ea79f5b02e8c05794ce9 100644 --- a/ee/lib/elastic/latest/issue_config.rb +++ b/ee/lib/elastic/latest/issue_config.rb @@ -22,7 +22,7 @@ module IssueConfig indexes :id, type: :integer indexes :iid, type: :integer - indexes :title, type: :text, index_options: 'positions' + indexes :title, type: :text, index_options: 'positions', analyzer: :title_analyzer indexes :description, type: :text, index_options: 'positions', analyzer: :code_analyzer indexes :created_at, type: :date indexes :updated_at, type: :date diff --git a/ee/spec/elastic/migrate/20240123181031_reindex_issue_to_update_analyzer_for_title_spec.rb b/ee/spec/elastic/migrate/20240123181031_reindex_issue_to_update_analyzer_for_title_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..6ab10b92e9f5954d9f408dbd6e7bb694be21b237 --- /dev/null +++ b/ee/spec/elastic/migrate/20240123181031_reindex_issue_to_update_analyzer_for_title_spec.rb @@ -0,0 +1,30 @@ +# frozen_string_literal: true + +require 'spec_helper' +require_relative 'migration_shared_examples' +require File.expand_path('ee/elastic/migrate/20240123181031_reindex_issue_to_update_analyzer_for_title.rb') + +RSpec.describe ReindexIssueToUpdateAnalyzerForTitle, feature_category: :global_search do + let(:version) { 20240123181031 } + let(:migration) { described_class.new(version) } + + it 'does not have migration options set', :aggregate_failures do + expect(migration).not_to be_batched + expect(migration).not_to be_retry_on_failure + end + + describe '#migrate' do + it 'creates reindexing task with correct target and options' do + expect { migration.migrate }.to change { Elastic::ReindexingTask.count }.by(1) + task = Elastic::ReindexingTask.last + expect(task.targets).to eq(%w[Issue]) + expect(task.options).to eq({ 'skip_pending_migrations_check' => true }) + end + end + + describe '#completed?' do + it 'always returns true' do + expect(migration.completed?).to eq(true) + end + end +end diff --git a/ee/spec/lib/gitlab/elastic/search_results_spec.rb b/ee/spec/lib/gitlab/elastic/search_results_spec.rb index 0faa50f57542e0fb24d3cfeb2992d0e4aeff4142..d5a8170489bd86ce8d84d95e47949da3610bbff2 100644 --- a/ee/spec/lib/gitlab/elastic/search_results_spec.rb +++ b/ee/spec/lib/gitlab/elastic/search_results_spec.rb @@ -364,17 +364,7 @@ expect(results.issues_count).to eq 0 end - it 'handles plural words through algorithmic stemming', :aggregate_failures do - issue1 = create(:issue, project: project_1, title: 'remove :title attribute from submit buttons to prevent un-styled tooltips') - issue2 = create(:issue, project: project_1, title: 'smarter submit behavior for buttons groups') - - ensure_elasticsearch_index! - - results = described_class.new(user, 'button', limit_project_ids) - - expect(results.objects('issues')).to contain_exactly(issue1, issue2) - expect(results.issues_count).to eq 2 - end + it_behaves_like 'can search by title for miscellaneous cases', 'issues' it 'executes count only queries' do results = described_class.new(user, query, limit_project_ids) diff --git a/ee/spec/support/shared_examples/services/search_service_shared_examples.rb b/ee/spec/support/shared_examples/services/search_service_shared_examples.rb index 2159e0cbc160c841cdbae33ee00795d677d9b04a..ad8f26b65be821a6a560c06f0a5228ac504671e3 100644 --- a/ee/spec/support/shared_examples/services/search_service_shared_examples.rb +++ b/ee/spec/support/shared_examples/services/search_service_shared_examples.rb @@ -118,3 +118,66 @@ end end end + +RSpec.shared_examples 'can search by title for miscellaneous cases' do |type| + let_it_be(:searched_project) { create(:project, :public, :repository, :wiki_repo) } + let(:records_count) { 2 } + + def create_records!(type) + case type + when 'issues' + create_list(:issue, records_count, project: searched_project) + end + end + + # rubocop:disable RSpec/InstanceVariable -- Want to reuse the @records + before do + @records = create_records!(type) + end + + it 'handles plural words through algorithmic stemming', :aggregate_failures do + @records[0].update!(title: 'remove :title attribute from submit buttons to prevent un-styled tooltips') + @records[1].update!(title: 'smarter submit behavior for buttons groups') + ensure_elasticsearch_index! + results = described_class.new(user, 'button', [searched_project.id]) + expect(results.objects(type)).to match_array(@records) + expect(results.issues_count).to eq records_count + end + + it 'handles if title has umlauts', :aggregate_failures do + @records[0].update!(title: 'köln') + @records[1].update!(title: 'kǒln') + ensure_elasticsearch_index! + results = described_class.new(user, 'koln', [searched_project.id]) + expect(results.objects(type)).to match_array(@records) + expect(results.issues_count).to eq records_count + end + + it 'handles if title has dots', :aggregate_failures do + @records[0].update!(title: 'with.dot.title') + @records[1].update!(title: 'there is.dot') + ensure_elasticsearch_index! + results = described_class.new(user, 'dot', [searched_project.id]) + expect(results.objects(type)).to match_array(@records) + expect(results.issues_count).to eq records_count + end + + it 'handles if title has underscore', :aggregate_failures do + @records[0].update!(title: 'with_underscore_text') + @records[1].update!(title: 'some_underscore') + ensure_elasticsearch_index! + results = described_class.new(user, 'underscore', [searched_project.id]) + expect(results.objects(type)).to match_array(@records) + expect(results.issues_count).to eq records_count + end + + it 'handles if title has camelcase', :aggregate_failures do + @records[0].update!(title: 'withCamelcaseTitle') + @records[1].update!(title: 'CamelcaseText') + ensure_elasticsearch_index! + results = described_class.new(user, 'Camelcase', [searched_project.id]) + expect(results.objects(type)).to match_array(@records) + expect(results.issues_count).to eq records_count + end + # rubocop:enable RSpec/InstanceVariable +end