diff --git a/ee/elastic/docs/20241003142503_add_embedding_to_work_items.yml b/ee/elastic/docs/20241003142503_add_embedding_to_work_items.yml new file mode 100644 index 0000000000000000000000000000000000000000..f0f7466d3182dafdd66179d6f14b7b06322380d5 --- /dev/null +++ b/ee/elastic/docs/20241003142503_add_embedding_to_work_items.yml @@ -0,0 +1,12 @@ +--- +name: AddEmbeddingToWorkItems +version: '20241003142503' +description: Adds the embedding field to workitems index +group: group::global search +milestone: '17.5' +introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/163946 +skippable: true +skip_condition: Must be on Elasticsearch 8+ or any version of OpenSearch +obsolete: false +marked_obsolete_by_url: +marked_obsolete_in_milestone: diff --git a/ee/elastic/migrate/20241003142503_add_embedding_to_work_items.rb b/ee/elastic/migrate/20241003142503_add_embedding_to_work_items.rb new file mode 100644 index 0000000000000000000000000000000000000000..ab12fa39a8e5287d127120f936182c99a273afa4 --- /dev/null +++ b/ee/elastic/migrate/20241003142503_add_embedding_to_work_items.rb @@ -0,0 +1,39 @@ +# frozen_string_literal: true + +class AddEmbeddingToWorkItems < Elastic::Migration + include Elastic::MigrationUpdateMappingsHelper + + skip_if -> { !elasticsearch_8_plus? && !opensearch? } + + def index_name + work_item_proxy.index_name + end + + def new_mappings + mappings = if elasticsearch_8_plus? + work_item_proxy.elasticsearch_8_plus_mappings + else + work_item_proxy.opensearch_mappings + end + + mappings.merge({ routing: { type: 'text' } }) + end +end + +private + +def elasticsearch_8_plus? + helper.matching_distribution?(:elasticsearch, min_version: '8.0.0') +end + +def opensearch? + helper.matching_distribution?(:opensearch) +end + +def helper + @helper ||= Gitlab::Elastic::Helper.default +end + +def work_item_proxy + Search::Elastic::Types::WorkItem +end diff --git a/ee/lib/search/elastic/types/work_item.rb b/ee/lib/search/elastic/types/work_item.rb index a7413146dbc298613d79d0182c375fa5898cf10c..4f4ff4fd7778f7f41474e11769f837b9ef2e70d3 100644 --- a/ee/lib/search/elastic/types/work_item.rb +++ b/ee/lib/search/elastic/types/work_item.rb @@ -34,6 +34,38 @@ def settings ) end + def elasticsearch_8_plus_mappings(mappings = {}) + return mappings unless helper.matching_distribution?(:elasticsearch, min_version: '8.0.0') + + mappings.merge({ + embedding_0: { + type: 'dense_vector', + dims: VERTEX_TEXT_EMBEDDING_DIMENSION, + similarity: 'cosine', + index: true + } + }) + end + + def opensearch_mappings(mappings = {}) + return mappings unless helper.matching_distribution?(:opensearch) + + mappings.merge({ + embedding_0: { + type: 'knn_vector', + dimension: VERTEX_TEXT_EMBEDDING_DIMENSION, + method: { + name: 'hnsw', + space_type: 'cosinesimil', + parameters: { + ef_construction: OPENSEARCH_EF_CONSTRUCTION, + m: OPENSEARCH_M + } + } + } + }) + end + private def base_mappings @@ -68,38 +100,6 @@ def base_mappings } end - def elasticsearch_8_plus_mappings(mappings) - return mappings unless helper.matching_distribution?(:elasticsearch, min_version: '8.0.0') - - mappings.merge({ - embedding_0: { - type: 'dense_vector', - dims: VERTEX_TEXT_EMBEDDING_DIMENSION, - similarity: 'cosine', - index: true - } - }) - end - - def opensearch_mappings(mappings) - return mappings unless helper.matching_distribution?(:opensearch) - - mappings.merge({ - embedding_0: { - type: 'knn_vector', - dimension: VERTEX_TEXT_EMBEDDING_DIMENSION, - method: { - name: 'hnsw', - space_type: 'cosinesimil', - parameters: { - ef_construction: OPENSEARCH_EF_CONSTRUCTION, - m: OPENSEARCH_M - } - } - } - }) - end - def helper @helper ||= Gitlab::Elastic::Helper.default end diff --git a/ee/spec/elastic/migrate/20241003142503_add_embedding_to_work_items_spec.rb b/ee/spec/elastic/migrate/20241003142503_add_embedding_to_work_items_spec.rb new file mode 100644 index 0000000000000000000000000000000000000000..8d5bcbd07da48fa7fc4c88adfab5836035b2449e --- /dev/null +++ b/ee/spec/elastic/migrate/20241003142503_add_embedding_to_work_items_spec.rb @@ -0,0 +1,106 @@ +# frozen_string_literal: true + +require 'spec_helper' +require File.expand_path('ee/elastic/migrate/20241003142503_add_embedding_to_work_items.rb') + +RSpec.describe AddEmbeddingToWorkItems, feature_category: :global_search do + let(:version) { 20241003142503 } + let(:migration) { described_class.new(version) } + + describe 'migration', :elastic, :sidekiq_inline do + before do + skip 'migration is skipped' if migration.skip_migration? + end + + include_examples 'migration adds mapping' + end + + # rubocop:disable RSpec/AnyInstanceOf -- multiple instances of helper + describe '#new_mappings' do + context 'when using Elasticsearch 8 or higher' do + before do + allow_any_instance_of(Gitlab::Elastic::Helper).to receive(:matching_distribution?) + .with(:elasticsearch, min_version: '8.0.0').and_return(true) + allow_any_instance_of(Gitlab::Elastic::Helper).to receive(:matching_distribution?) + .with(:opensearch).and_return(false) + end + + it 'returns the correct mapping for Elasticsearch' do + expected_mapping = { + routing: { + type: 'text' + }, + embedding_0: { + type: 'dense_vector', + dims: 768, + similarity: 'cosine', + index: true + } + } + expect(migration.new_mappings).to eq(expected_mapping) + end + end + + context 'when using OpenSearch' do + before do + allow_next_instance_of(Gitlab::Elastic::Helper) do |helper| + allow(helper).to receive(:matching_distribution?).with(:elasticsearch, min_version: '8.0.0').and_return(false) + allow(helper).to receive(:matching_distribution?).with(:opensearch).and_return(true) + end + + allow_any_instance_of(Gitlab::Elastic::Helper).to receive(:matching_distribution?) + .with(:elasticsearch, min_version: '8.0.0').and_return(false) + allow_any_instance_of(Gitlab::Elastic::Helper).to receive(:matching_distribution?) + .with(:opensearch).and_return(true) + end + + it 'returns the correct mapping for OpenSearch' do + expected_mapping = { + routing: { + type: 'text' + }, + embedding_0: { + type: 'knn_vector', + dimension: 768, + method: { + name: 'hnsw', + space_type: 'cosinesimil', + parameters: { + ef_construction: 100, + m: 16 + } + } + } + } + expect(migration.new_mappings).to eq(expected_mapping) + end + end + end + # rubocop:enable RSpec/AnyInstanceOf + + describe 'skip_migration?' do + let(:helper) { Gitlab::Elastic::Helper.default } + + before do + allow(Gitlab::Elastic::Helper).to receive(:default).and_return(helper) + allow(helper).to receive(:vectors_supported?).and_return(vectors_supported) + described_class.skip_if -> { !Gitlab::Elastic::Helper.default.vectors_supported?(:elasticsearch) } + end + + context 'if vectors are supported' do + let(:vectors_supported) { true } + + it 'returns false' do + expect(migration.skip_migration?).to be_falsey + end + end + + context 'if vectors are not supported' do + let(:vectors_supported) { false } + + it 'returns true' do + expect(migration.skip_migration?).to be_truthy + end + end + end +end