diff --git a/doc/development/testing_guide/best_practices.md b/doc/development/testing_guide/best_practices.md index dbee7acac690e1dad1bf7906dd2dc44e698750d8..5648c39e370fd0dea84748ab626ae6c0961f6522 100644 --- a/doc/development/testing_guide/best_practices.md +++ b/doc/development/testing_guide/best_practices.md @@ -1298,7 +1298,7 @@ creates and deletes indices before and after all examples. The `:elastic_delete_by_query` trait was added to reduce runtime for pipelines by creating and deleting indices at the start and end of each context only. The [Elasticsearch delete by query API](https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-delete-by-query.html) -is used to delete data in all indices between examples to ensure a clean index. +is used to delete data in all indices (except the migrations index) between examples to ensure a clean index. The `:elastic_clean` trait creates and deletes indices between examples to ensure a clean index. This way, tests are not polluted with non-essential data. If using the `:elastic` or `:elastic_delete_by_query` trait @@ -1336,6 +1336,12 @@ It uses the [Elasticsearch Refresh API](https://www.elastic.co/guide/en/elastics to make sure all operations performed on an index since the last refresh are available for search. This method is typically called after loading data into PostgreSQL to ensure the data is indexed and searchable. +You can use the `SEARCH_SPEC_BENCHMARK` environment variable to benchmark test setup steps: + +```console +SEARCH_SPEC_BENCHMARK=1 bundle exec rspec ee/spec/lib/elastic/latest/merge_request_class_proxy_spec.rb +``` + #### Test Snowplow events WARNING: diff --git a/ee/spec/lib/ee/gitlab/elastic/helper_spec.rb b/ee/spec/lib/ee/gitlab/elastic/helper_spec.rb index 5ca5fe6eb40869c52e52fe78ad9679e13b437fd7..0740ffeb7ccb4e8b7d5b2901b0a4855e98923ef8 100644 --- a/ee/spec/lib/ee/gitlab/elastic/helper_spec.rb +++ b/ee/spec/lib/ee/gitlab/elastic/helper_spec.rb @@ -808,27 +808,65 @@ end end - describe '#target_index_names', :elastic_delete_by_query do + describe '#target_index_names' do let(:target_index) { nil } subject(:target_index_names) { helper.target_index_names(target: target_index) } - context 'when a nil target is provided' do - let(:index_regex) { /\Agitlab-test-[0-9-].*\z/ } + context 'when alias exists' do + before do + allow(helper).to receive(:alias_exists?).and_return(true) + allow(helper.client.indices).to receive(:get_alias).and_return(aliases) + end + + context 'when a nil target is provided' do + let(:aliases) do + { + "gitlab-test-20231129-200242-0002" => { "aliases" => { "gitlab-test" => { "is_write_index" => true } } }, + "gitlab-test-20231129-200242" => { "aliases" => { "gitlab-test" => { "is_write_index" => false } } } + } + end - it 'uses the default target from target_name' do - expect(target_index_names.keys).to contain_exactly(index_regex, index_regex) - expect(target_index_names.values).to contain_exactly(true, false) + let(:index_regex) { /\Agitlab-test-[\d-]+\z/ } + + it 'uses the default target from target_name' do + expect(target_index_names.keys).to contain_exactly(index_regex, index_regex) + expect(target_index_names.values).to contain_exactly(true, false) + end end - end - context 'when a non-nil target is provided' do - let(:target_index) { Project.index_name } - let(:index_regex) { /\Agitlab-test-projects-[0-9-].*\z/ } + context 'when a target is provided' do + let(:aliases) do + { + "gitlab-test-projects-20231129-0002" => + { "aliases" => { "gitlab-test-projects" => { "is_write_index" => true } } }, + "gitlab-test-projects-20231129" => + { "aliases" => { "gitlab-test-projects" => { "is_write_index" => false } } } + } + end + + let(:target_index) { "gitlab-test-projects" } + let(:index_regex) { /\Agitlab-test-projects-[\d-]+\z/ } + + it 'uses the target index' do + expect(target_index_names.keys).to contain_exactly(index_regex, index_regex) + expect(target_index_names.values).to contain_exactly(true, false) + end + end - it 'uses the target index' do - expect(target_index_names.keys).to contain_exactly(index_regex, index_regex) - expect(target_index_names.values).to contain_exactly(true, false) + context 'when write index is not set' do + let(:aliases) do + { + "gitlab-test-20231129-200242" => { "aliases" => { "gitlab-test" => {} } } + } + end + + let(:index_regex) { /\Agitlab-test-[\d-]+\z/ } + + it 'returns the write index as true' do + expect(target_index_names.keys).to contain_exactly(index_regex) + expect(target_index_names.values).to contain_exactly(true) + end end end diff --git a/ee/spec/lib/gitlab/elastic/bulk_indexer_spec.rb b/ee/spec/lib/gitlab/elastic/bulk_indexer_spec.rb index 4ef2b64e67caf0908f41777fafcd858d427859b1..7603855c42b91e98614992e26465b29269267af3 100644 --- a/ee/spec/lib/gitlab/elastic/bulk_indexer_spec.rb +++ b/ee/spec/lib/gitlab/elastic/bulk_indexer_spec.rb @@ -2,30 +2,28 @@ require 'spec_helper' -RSpec.describe Gitlab::Elastic::BulkIndexer, :elastic, :clean_gitlab_redis_shared_state do +RSpec.describe Gitlab::Elastic::BulkIndexer, :elastic, :clean_gitlab_redis_shared_state, + feature_category: :global_search do let_it_be(:issue) { create(:issue) } let_it_be(:other_issue) { create(:issue, project: issue.project) } let(:project) { issue.project } - let(:logger) { ::Gitlab::Elasticsearch::Logger.build } - - subject(:indexer) { described_class.new(logger: logger) } - let(:es_client) { indexer.client } - let(:issue_as_ref) { ref(issue) } let(:issue_as_json_with_times) { issue.__elasticsearch__.as_indexed_json } let(:issue_as_json) { issue_as_json_with_times.except('created_at', 'updated_at') } let(:other_issue_as_ref) { ref(other_issue) } - # Because there are two indices, one rolled over and one active, - # there is an additional op for each index. - # # Whatever the json payload bytesize is, it will ultimately be multiplied # by the total number of indices. We add an additional 0.5 to the overflow # factor to simulate the bulk_limit being exceeded in tests. - let(:bulk_limit_overflow_factor) { 2.5 } + let(:bulk_limit_overflow_factor) do + helper = Gitlab::Elastic::Helper.default + helper.target_index_names(target: nil).count + 0.5 + end + + subject(:indexer) { described_class.new(logger: logger) } describe '#process' do it 'returns bytesize for the indexing operation and data' do @@ -84,19 +82,104 @@ end describe '#flush' do - it 'completes a bulk' do - indexer.process(issue_as_ref) + context 'when curation has not occurred' do + it 'completes a bulk' do + indexer.process(issue_as_ref) - # The es_client will receive three items in bulk request for a single ref: - # 1) The bulk index header, ie: { "index" => { "_index": "gitlab-issues" } } - # 2) The payload of the actual document to write index - # 3) The delete request for document in rolled over index - expect(es_client) - .to receive(:bulk) - .with(body: [kind_of(String), kind_of(String), kind_of(String)]) - .and_return({}) + # The es_client will receive three items in bulk request for a single ref: + # 1) The bulk index header, ie: { "index" => { "_index": "gitlab-issues" } } + # 2) The payload of the actual document to write index + expect(es_client) + .to receive(:bulk) + .with(body: [kind_of(String), kind_of(String)]) + .and_return({}) + + expect(indexer.flush).to be_empty + end + + it 'fails all documents on exception' do + expect(es_client).to receive(:bulk) { raise 'An exception' } + + indexer.process(issue_as_ref) + indexer.process(other_issue_as_ref) + + expect(indexer.flush).to contain_exactly(issue_as_ref, other_issue_as_ref) + expect(indexer.failures).to contain_exactly(issue_as_ref, other_issue_as_ref) + end + + it 'fails a document correctly on exception after adding an item that exceeded the bulk limit' do + bulk_limit_bytes = (issue_as_json_with_times.to_json.bytesize * bulk_limit_overflow_factor).to_i + set_bulk_limit(indexer, bulk_limit_bytes) + indexer.process(issue_as_ref) + allow(es_client).to receive(:bulk).and_return({}) + + indexer.process(issue_as_ref) - expect(indexer.flush).to be_empty + expect(es_client).to have_received(:bulk) do |args| + body_bytesize = args[:body].sum(&:bytesize) + expect(body_bytesize).to be <= bulk_limit_bytes + end + + expect(es_client).to receive(:bulk) { raise 'An exception' } + + expect(indexer.flush).to contain_exactly(issue_as_ref) + expect(indexer.failures).to contain_exactly(issue_as_ref) + end + end + + context 'when curation has occurred' do + before_all do + curator = ::Gitlab::Search::IndexCurator.new(ignore_patterns: [/migrations/], force: true, dry_run: false) + curator.curate! + end + + it 'completes a bulk' do + indexer.process(issue_as_ref) + + # The es_client will receive three items in bulk request for a single ref: + # 1) The bulk index header, ie: { "index" => { "_index": "gitlab-issues" } } + # 2) The payload of the actual document to write index + # 3) The delete request for document in rolled over index + expect(es_client) + .to receive(:bulk) + .with(body: [kind_of(String), kind_of(String), kind_of(String)]) + .and_return({}) + + expect(indexer.flush).to be_empty + end + + it 'fails all documents on exception' do + expect(es_client).to receive(:bulk) { raise 'An exception' } + + indexer.process(issue_as_ref) + indexer.process(other_issue_as_ref) + + # Since there are two indices, one rolled over and one active + # we can expect to have double the instances of failed refs + expect(indexer.flush).to contain_exactly(issue_as_ref, issue_as_ref, other_issue_as_ref, other_issue_as_ref) + expect(indexer.failures).to contain_exactly(issue_as_ref, issue_as_ref, other_issue_as_ref, other_issue_as_ref) + end + + it 'fails a document correctly on exception after adding an item that exceeded the bulk limit' do + bulk_limit_bytes = (issue_as_json_with_times.to_json.bytesize * bulk_limit_overflow_factor).to_i + set_bulk_limit(indexer, bulk_limit_bytes) + indexer.process(issue_as_ref) + allow(es_client).to receive(:bulk).and_return({}) + + indexer.process(issue_as_ref) + + expect(es_client).to have_received(:bulk) do |args| + body_bytesize = args[:body].sum(&:bytesize) + expect(body_bytesize).to be <= bulk_limit_bytes + end + + expect(es_client).to receive(:bulk) { raise 'An exception' } + + # Since there are two indices, one rolled over and one active + # we can expect to have double the instances of failed refs + expect(indexer.flush).to contain_exactly(issue_as_ref, issue_as_ref) + expect(indexer.failures).to contain_exactly(issue_as_ref, issue_as_ref) + end end it 'fails documents that elasticsearch refuses to accept' do @@ -116,39 +199,6 @@ expect(search_one(Issue)).to have_attributes(issue_as_json) end - it 'fails all documents on exception' do - expect(es_client).to receive(:bulk) { raise 'An exception' } - - indexer.process(issue_as_ref) - indexer.process(other_issue_as_ref) - - # Since there are two indices, one rolled over and one active - # we can expect to have double the instances of failed refs - expect(indexer.flush).to contain_exactly(issue_as_ref, issue_as_ref, other_issue_as_ref, other_issue_as_ref) - expect(indexer.failures).to contain_exactly(issue_as_ref, issue_as_ref, other_issue_as_ref, other_issue_as_ref) - end - - it 'fails a document correctly on exception after adding an item that exceeded the bulk limit' do - bulk_limit_bytes = (issue_as_json_with_times.to_json.bytesize * bulk_limit_overflow_factor).to_i - set_bulk_limit(indexer, bulk_limit_bytes) - indexer.process(issue_as_ref) - allow(es_client).to receive(:bulk).and_return({}) - - indexer.process(issue_as_ref) - - expect(es_client).to have_received(:bulk) do |args| - body_bytesize = args[:body].sum(&:bytesize) - expect(body_bytesize).to be <= bulk_limit_bytes - end - - expect(es_client).to receive(:bulk) { raise 'An exception' } - - # Since there are two indices, one rolled over and one active - # we can expect to have double the instances of failed refs - expect(indexer.flush).to contain_exactly(issue_as_ref, issue_as_ref) - expect(indexer.failures).to contain_exactly(issue_as_ref, issue_as_ref) - end - context 'indexing an issue' do it 'adds the issue to the index' do indexer.process(issue_as_ref) diff --git a/ee/spec/support/elastic.rb b/ee/spec/support/elastic.rb index dd3fca5527088aeabcfdca832897f5ff5c0a96f7..fd262fa1a7e35e648976160a65e8c1b2f9414b6d 100644 --- a/ee/spec/support/elastic.rb +++ b/ee/spec/support/elastic.rb @@ -22,12 +22,21 @@ def curator def setup(multi_index: true) clear_tracking! - delete_indices! - helper.create_empty_index(options: { settings: { number_of_replicas: 0 } }) - helper.create_migrations_index - ::Elastic::DataMigrationService.mark_all_as_completed! - helper.create_standalone_indices - curator.curate! if multi_index + benchmark(:delete_indices!) { delete_indices! } + + benchmark(:create_migrations_index) { helper.create_migrations_index } + benchmark(:mark_all_as_completed!) { Elastic::DataMigrationService.mark_all_as_completed! } + + name_suffix = Time.now.utc.strftime('%S.%L') + benchmark(:create_empty_index) do + helper.create_empty_index(options: { settings: { number_of_replicas: 0 }, name_suffix: name_suffix }) + end + benchmark(:create_standalone_indices) do + helper.create_standalone_indices(options: { settings: { number_of_replicas: 0 }, name_suffix: name_suffix }) + end + + benchmark(:curate) { curator.curate! } if multi_index + refresh_elasticsearch_index! end @@ -61,6 +70,19 @@ def delete_all_data_from_index! } ) end + + def benchmark(name) + return yield unless ENV['SEARCH_SPEC_BENCHMARK'] + + result = nil + time = Benchmark.realtime do + result = yield + end + + puts({ name: name, elapsed_time: time.round(2) }.to_json) + + result + end end end @@ -74,7 +96,7 @@ def delete_all_data_from_index! # wherever possible. config.before(:all, :elastic) do helper = Elastic::TestHelpers.new - helper.setup(multi_index: true) + helper.setup(multi_index: false) end config.after(:all, :elastic) do @@ -99,7 +121,7 @@ def delete_all_data_from_index! end config.before(:context, :elastic_delete_by_query) do - Elastic::TestHelpers.new.setup + Elastic::TestHelpers.new.setup(multi_index: false) end config.after(:context, :elastic_delete_by_query) do