diff --git a/ee/changelogs/unreleased/300351-remove-joins-from-the-elasticsearch-query-for-project-group-sco.yml b/ee/changelogs/unreleased/300351-remove-joins-from-the-elasticsearch-query-for-project-group-sco.yml new file mode 100644 index 0000000000000000000000000000000000000000..6932a790260aae3f6dcb060f8f7b245faf5a9462 --- /dev/null +++ b/ee/changelogs/unreleased/300351-remove-joins-from-the-elasticsearch-query-for-project-group-sco.yml @@ -0,0 +1,5 @@ +--- +title: Advanced Search Migration to add permission data to notes documents +merge_request: 56177 +author: +type: changed diff --git a/ee/elastic/migrate/20210128163600_add_permissions_data_to_notes_documents.rb b/ee/elastic/migrate/20210128163600_add_permissions_data_to_notes_documents.rb new file mode 100644 index 0000000000000000000000000000000000000000..6113bac44f49c71c60fdcdb3c515835a91b78e8a --- /dev/null +++ b/ee/elastic/migrate/20210128163600_add_permissions_data_to_notes_documents.rb @@ -0,0 +1,105 @@ +# frozen_string_literal: true + +class AddPermissionsDataToNotesDocuments < Elastic::Migration + batched! + throttle_delay 3.minutes + + QUERY_BATCH_SIZE = 9_000 + UPDATE_BATCH_SIZE = 100 + + def migrate + if completed? + log "Skipping adding permission data to notes documents migration since it is already applied" + return + end + + log "Adding permission data to notes documents for batch of #{QUERY_BATCH_SIZE} documents" + + query = es_query.merge(size: QUERY_BATCH_SIZE) + results = client.search(index: helper.target_index_name, body: query) + hits = results.dig('hits', 'hits') || [] + + document_references = hits.map do |hit| + id = hit.dig('_source', 'id') + es_id = hit.dig('_id') + es_parent = hit.dig('_source', 'join_field', 'parent') + + # ensure that any notes missing from the database will be removed from Elasticsearch + # as the data is back-filled + Gitlab::Elastic::DocumentReference.new(Note, id, es_id, es_parent) + end + + document_references.each_slice(UPDATE_BATCH_SIZE) do |refs| + Elastic::ProcessBookkeepingService.track!(*refs) + end + + log "Adding permission data to notes documents is completed for batch of #{document_references.size} documents" + end + + def completed? + log "completed check: Refreshing #{helper.target_index_name}" + helper.refresh_index(index_name: helper.target_index_name) + + results = client.count(index: helper.target_index_name, body: es_query) + doc_count = results.dig('count') + log "Migration has #{doc_count} documents remaining" if doc_count + + doc_count && doc_count == 0 + end + + private + + def es_query + { + query: { + bool: { + filter: { + bool: { + must: note_type_filter, + should: [ + field_does_not_exist_for_type('visibility_level', 'Issue'), + field_does_not_exist_for_type('visibility_level', 'Commit'), + field_does_not_exist_for_type('visibility_level', 'MergeRequest'), + field_does_not_exist_for_type('visibility_level', 'Snippet'), + field_does_not_exist_for_type('issues_access_level', 'Issue'), + field_does_not_exist_for_type('repository_access_level', 'Commit'), + field_does_not_exist_for_type('merge_requests_access_level', 'MergeRequest'), + field_does_not_exist_for_type('snippets_access_level', 'Snippet') + ], + minimum_should_match: 1 + } + } + } + } + } + end + + def note_type_filter + { + term: { + type: { + value: 'note' + } + } + } + end + + def field_does_not_exist_for_type(field, type) + { + bool: { + must: { + term: { + noteable_type: { + value: type + } + } + }, + must_not: { + exists: { + field: field + } + } + } + } + end +end diff --git a/ee/spec/elastic/migrate/202101281636000_add_permissions_data_to_notes_documents_spec.rb b/ee/spec/elastic/migrate/202101281636000_add_permissions_data_to_notes_documents_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..f1d89c097cb1cfc40cfcd60ce4dd33ab17efdb1d --- /dev/null +++ b/ee/spec/elastic/migrate/202101281636000_add_permissions_data_to_notes_documents_spec.rb @@ -0,0 +1,205 @@ +# frozen_string_literal: true + +require 'spec_helper' +require File.expand_path('ee/elastic/migrate/20210128163600_add_permissions_data_to_notes_documents.rb') + +RSpec.describe AddPermissionsDataToNotesDocuments, :elastic, :sidekiq_inline do + let(:version) { 20210128163600 } + let(:migration) { described_class.new(version) } + + before do + stub_ee_application_setting(elasticsearch_search: true, elasticsearch_indexing: true) + end + + describe 'migration_options' do + it 'has migration options set', :aggregate_failures do + expect(migration.batched?).to be_truthy + expect(migration.throttle_delay).to eq(3.minutes) + end + end + + describe '#migrate' do + let(:notes) { [note_on_commit, note_on_issue, note_on_merge_request, note_on_snippet] } + let!(:note_on_commit) { create(:note_on_commit) } + let!(:note_on_issue) { create(:note_on_issue) } + let!(:note_on_merge_request) { create(:note_on_merge_request) } + let!(:note_on_snippet) { create(:note_on_project_snippet) } + + before do + ensure_elasticsearch_index! + end + + context 'when migration is completed' do + it 'does not queue documents for indexing' do + expect(migration.completed?).to be_truthy + expect(::Elastic::ProcessBookkeepingService).not_to receive(:track!) + + migration.migrate + end + end + + context 'migration process' do + before do + remove_permission_data_for_notes(notes) + end + + it 'queues documents for indexing' do + expect(::Elastic::ProcessBookkeepingService).to receive(:track!).once do |*tracked_refs| + expect(tracked_refs.count).to eq(4) + end + + migration.migrate + end + + it 'only queues documents for indexing that are missing permission data', :aggregate_failures do + add_permission_data_for_notes([note_on_issue, note_on_snippet, note_on_merge_request]) + + expected = [Gitlab::Elastic::DocumentReference.new(Note, note_on_commit.id, note_on_commit.es_id, note_on_commit.es_parent)] + expect(::Elastic::ProcessBookkeepingService).to receive(:track!).with(*expected).once + + migration.migrate + end + + it 'processes in batches until completed', :aggregate_failures do + stub_const("#{described_class}::QUERY_BATCH_SIZE", 2) + stub_const("#{described_class}::UPDATE_BATCH_SIZE", 1) + + allow(::Elastic::ProcessBookkeepingService).to receive(:track!).and_call_original + + migration.migrate + + expect(::Elastic::ProcessBookkeepingService).to have_received(:track!).exactly(2).times + + ensure_elasticsearch_index! + migration.migrate + + expect(::Elastic::ProcessBookkeepingService).to have_received(:track!).exactly(4).times + + ensure_elasticsearch_index! + migration.migrate + + # The migration should have already finished so there are no more items to process + expect(::Elastic::ProcessBookkeepingService).to have_received(:track!).exactly(4).times + expect(migration).to be_completed + end + end + end + + describe '#completed?' do + using RSpec::Parameterized::TableSyntax + + let(:helper) { Gitlab::Elastic::Helper.new } + + subject { migration.completed? } + + before do + allow(migration).to receive(:helper).and_return(helper) + end + + it 'refreshes the index' do + expect(helper).to receive(:refresh_index) + + subject + end + + # Only affected note types are issue, commit, merge requests, project snippets and completed? should return + # false if documents are missing data. The completed? method will be true for all other types + where(:note_type, :expected_result) do + :diff_note_on_commit | false + :diff_note_on_design | true + :diff_note_on_merge_request | false + :discussion_note_on_commit | false + :discussion_note_on_issue | false + :discussion_note_on_merge_request | false + :discussion_note_on_personal_snippet | true + :discussion_note_on_project_snippet | false + :discussion_note_on_vulnerability | true + :legacy_diff_note_on_commit | false + :legacy_diff_note_on_merge_request | false + :note_on_alert | true + :note_on_commit | false + :note_on_design | true + :note_on_epic | true + :note_on_issue | false + :note_on_merge_request | false + :note_on_personal_snippet | true + :note_on_project_snippet | false + :note_on_vulnerability | true + end + + with_them do + let!(:note) { create(note_type) } # rubocop:disable Rails/SaveBang + + context 'when documents are missing permissions data' do + before do + ensure_elasticsearch_index! + remove_permission_data_for_notes([note]) + end + + it { is_expected.to eq(expected_result) } + end + + context 'when no documents are missing permissions data' do + before do + ensure_elasticsearch_index! + end + + it { is_expected.to be_truthy } + end + end + end + + private + + def add_permission_data_for_notes(notes) + script = { + source: "ctx._source['visibility_level'] = params.visibility_level; ctx._source['issues_access_level'] = params.visibility_level; ctx._source['merge_requests_access_level'] = params.visibility_level; ctx._source['snippets_access_level'] = params.visibility_level; ctx._source['repository_access_level'] = params.visibility_level;", + lang: "painless", + params: { + visibility_level: Gitlab::VisibilityLevel::PRIVATE + } + } + + update_by_query(notes, script) + end + + def remove_permission_data_for_notes(notes) + script = { + source: "ctx._source.remove('visibility_level'); ctx._source.remove('repository_access_level'); ctx._source.remove('snippets_access_level'); ctx._source.remove('merge_requests_access_level'); ctx._source.remove('issues_access_level');" + } + + update_by_query(notes, script) + end + + def update_by_query(notes, script) + note_ids = notes.map(&:id) + + client = Note.__elasticsearch__.client + client.update_by_query({ + index: Note.__elasticsearch__.index_name, + wait_for_completion: true, # run synchronously + refresh: true, # make operation visible to search + body: { + script: script, + query: { + bool: { + must: [ + { + terms: { + id: note_ids + } + }, + { + term: { + type: { + value: 'note' + } + } + } + ] + } + } + } + }) + end +end