diff --git a/ee/app/models/concerns/search/elastic/issues_search.rb b/ee/app/models/concerns/search/elastic/issues_search.rb index 6f57c9ca17b63f490523a88d8bc2af82991adfb3..1a15f58182f250baf1ea63802820dc7c6ef40b4d 100644 --- a/ee/app/models/concerns/search/elastic/issues_search.rb +++ b/ee/app/models/concerns/search/elastic/issues_search.rb @@ -36,13 +36,12 @@ def maintain_elasticsearch_destroy # rubocop: disable Gitlab/FeatureFlagWithoutActor -- global flags def track_embedding? - instance_of?(Issue) && + instance_of?(WorkItem) && project&.public? && Feature.enabled?(:ai_global_switch, type: :ops) && - Feature.enabled?(:elasticsearch_issue_embedding, project, type: :ops) && + Feature.enabled?(:elasticsearch_work_item_embedding, project, type: :ops) && Gitlab::Saas.feature_available?(:ai_vertex_embeddings) && - Gitlab::Elastic::Helper.default.vectors_supported?(:elasticsearch) && - ::Elastic::DataMigrationService.migration_has_finished?(:add_embedding_to_issues) + (work_item_embeddings_elastic? || work_item_embeddings_opensearch?) end # rubocop: enable Gitlab/FeatureFlagWithoutActor @@ -81,6 +80,16 @@ def get_indexing_data indexing_data << synced_epic if synced_epic.present? indexing_data.compact end + + def work_item_embeddings_elastic? + Gitlab::Elastic::Helper.default.vectors_supported?(:elasticsearch) && + ::Elastic::DataMigrationService.migration_has_finished?(:add_embedding_to_work_items) + end + + def work_item_embeddings_opensearch? + Gitlab::Elastic::Helper.default.vectors_supported?(:opensearch) && + ::Elastic::DataMigrationService.migration_has_finished?(:add_embedding_to_work_items_opensearch) + end end end end diff --git a/ee/elastic/migrate/20241003142503_add_embedding_to_work_items.rb b/ee/elastic/migrate/20241003142503_add_embedding_to_work_items.rb index a92a21ee288f4229078fed432f9f852e46d9a8a9..373ea834324d7764a0673e8de91a07e2e170f4ea 100644 --- a/ee/elastic/migrate/20241003142503_add_embedding_to_work_items.rb +++ b/ee/elastic/migrate/20241003142503_add_embedding_to_work_items.rb @@ -17,7 +17,7 @@ def new_mappings private def elasticsearch_8_plus? - helper.matching_distribution?(:elasticsearch, min_version: '8.0.0') + helper.vectors_supported?(:elasticsearch) end def helper diff --git a/ee/elastic/migrate/20241017094601_add_embedding_to_work_items_opensearch.rb b/ee/elastic/migrate/20241017094601_add_embedding_to_work_items_opensearch.rb index 07c664405f6bd5c8ffa3e9b5fbc22213ee478f31..f90588ae37fbb04724b8a404096d2a7e794eccb4 100644 --- a/ee/elastic/migrate/20241017094601_add_embedding_to_work_items_opensearch.rb +++ b/ee/elastic/migrate/20241017094601_add_embedding_to_work_items_opensearch.rb @@ -15,7 +15,7 @@ def completed? private def opensearch? - helper.matching_distribution?(:opensearch) + helper.vectors_supported?(:opensearch) end def helper diff --git a/ee/lib/gitlab/elastic/helper.rb b/ee/lib/gitlab/elastic/helper.rb index 2a5cec634f9c0fec312d050a71e79f35df25fa11..ee0cbf59efc026707b03e4103150c3956d1f1d99 100644 --- a/ee/lib/gitlab/elastic/helper.rb +++ b/ee/lib/gitlab/elastic/helper.rb @@ -398,13 +398,6 @@ def unsupported_version? end end - # Vectors are only supported on Elasticsearch 8+ - def vectors_supported?(distribution) - info = server_info - - distribution == :elasticsearch && info[:distribution] == 'elasticsearch' && info[:version].to_f >= 8 - end - # Checks if the Elasticsearch/OpenSearch distribution and version match the specified requirements. # @param distribution [Symbol]: The expected distribution (:elasticsearch or :opensearch) # @param min_version [String]: The minimum required version (optional) @@ -420,6 +413,10 @@ def matching_distribution?(distribution, min_version: nil) Gitlab::VersionInfo.parse(info[:version]) >= Gitlab::VersionInfo.parse(min_version) end + def vectors_supported?(distribution) + matching_distribution?(distribution, min_version: distribution == :elasticsearch ? '8.0.0' : nil) + end + def create_index(index_name:, alias_name:, with_alias:, settings:, mappings:, options: {}) if index_exists?(index_name: index_name) return if options[:skip_if_exists] diff --git a/ee/lib/search/elastic/types/work_item.rb b/ee/lib/search/elastic/types/work_item.rb index 6b398c929dbe961021a4554f67e7efa35b9b6137..28b5adf344383a36aee9b7e8bda647b38a247d16 100644 --- a/ee/lib/search/elastic/types/work_item.rb +++ b/ee/lib/search/elastic/types/work_item.rb @@ -34,7 +34,7 @@ def settings end def elasticsearch_8_plus_mappings(mappings = {}) - return mappings unless helper.matching_distribution?(:elasticsearch, min_version: '8.0.0') + return mappings unless helper.vectors_supported?(:elasticsearch) mappings.merge({ embedding_0: { @@ -47,7 +47,7 @@ def elasticsearch_8_plus_mappings(mappings = {}) end def opensearch_mappings(mappings = {}) - return mappings unless helper.matching_distribution?(:opensearch) + return mappings unless helper.vectors_supported?(:opensearch) mappings.merge({ embedding_0: { @@ -68,7 +68,7 @@ def opensearch_mappings(mappings = {}) private def opensearch_settings(settings) - return settings unless helper.matching_distribution?(:opensearch) + return settings unless helper.vectors_supported?(:opensearch) settings.deep_merge({ index: { knn: true } }) end diff --git a/ee/spec/elastic/migrate/20240410193847_add_embedding_to_issues_spec.rb b/ee/spec/elastic/migrate/20240410193847_add_embedding_to_issues_spec.rb index f8ff82d6b88e7f643f66568a07908eecaaa0ba91..b554a4d5bb322158f724db93b29572820ed2e824 100644 --- a/ee/spec/elastic/migrate/20240410193847_add_embedding_to_issues_spec.rb +++ b/ee/spec/elastic/migrate/20240410193847_add_embedding_to_issues_spec.rb @@ -46,7 +46,7 @@ describe 'skip_migration?' do before do allow(helper).to receive(:vectors_supported?).and_return(vectors_supported) - described_class.skip_if -> { !Gitlab::Elastic::Helper.default.vectors_supported?(:elasticsearch) } + described_class.skip_if -> { !helper.vectors_supported?(:elasticsearch) } end context 'if vectors are supported' do diff --git a/ee/spec/elastic/migrate/20241003142503_add_embedding_to_work_items_spec.rb b/ee/spec/elastic/migrate/20241003142503_add_embedding_to_work_items_spec.rb index 9122e8d0fc21c8d81b0fdba2237f6a582478cf5e..25636f66962b3e53f2c798b652a067590435f50c 100644 --- a/ee/spec/elastic/migrate/20241003142503_add_embedding_to_work_items_spec.rb +++ b/ee/spec/elastic/migrate/20241003142503_add_embedding_to_work_items_spec.rb @@ -39,9 +39,9 @@ before do allow(Gitlab::Elastic::Helper).to receive(:default).and_return(helper) - allow(helper).to receive(:matching_distribution?).and_return(vectors_supported) + allow(helper).to receive(:vectors_supported?).and_return(vectors_supported) described_class.skip_if -> do - !Gitlab::Elastic::Helper.default.matching_distribution?(:elasticsearch, min_version: '8.0.0') + !Gitlab::Elastic::Helper.default.vectors_supported?(:elasticsearch) end end diff --git a/ee/spec/elastic/migrate/20241017094601_add_embedding_to_work_items_opensearch_spec.rb b/ee/spec/elastic/migrate/20241017094601_add_embedding_to_work_items_opensearch_spec.rb index fca4f3666aeb3a60fa7e5e14c4419a34329aa698..5291109e1d33cccc0d23db5a2ec9acfe8aaa720e 100644 --- a/ee/spec/elastic/migrate/20241017094601_add_embedding_to_work_items_opensearch_spec.rb +++ b/ee/spec/elastic/migrate/20241017094601_add_embedding_to_work_items_opensearch_spec.rb @@ -39,8 +39,8 @@ before do allow(Gitlab::Elastic::Helper).to receive(:default).and_return(helper) - allow(helper).to receive(:matching_distribution?).and_return(vectors_supported) - described_class.skip_if -> { !Gitlab::Elastic::Helper.default.matching_distribution?(:opensearch) } + allow(helper).to receive(:vectors_supported?).and_return(vectors_supported) + described_class.skip_if -> { !Gitlab::Elastic::Helper.default.vectors_supported?(:opensearch) } end context 'if vectors are supported' do diff --git a/ee/spec/elastic_integration/ingestion_pipeline_spec.rb b/ee/spec/elastic_integration/ingestion_pipeline_spec.rb index 8131fa96b1c7b7e6b47e9c8d21521de05de87ddc..cfca6a6842599f3ceb36ee3181d21bb4c4834075 100644 --- a/ee/spec/elastic_integration/ingestion_pipeline_spec.rb +++ b/ee/spec/elastic_integration/ingestion_pipeline_spec.rb @@ -8,10 +8,12 @@ let_it_be(:project) { create(:project, maintainers: user) } let(:project_routing) { "project_#{project.id}" } + let(:group_routing) { "group_#{project.root_ancestor.id}" } let(:logger) { ::Gitlab::Elasticsearch::Logger.build } let(:bulk_indexer) { ::Gitlab::Elastic::BulkIndexer.new(logger: logger) } let(:bookkeeping_service) { ::Elastic::ProcessBookkeepingService } let(:client) { Gitlab::Elastic::Helper.default.client } + let(:embedding) { Array.new(768, 1.0) } before do allow(::Gitlab::Elastic::BulkIndexer).to receive(:new).and_return(bulk_indexer) @@ -125,59 +127,63 @@ end context 'for embedding references', :elastic_clean do + let(:helper) { Gitlab::Elastic::Helper.default } + before do - skip 'embeddings are not supported' unless Gitlab::Elastic::Helper.default.vectors_supported?(:elasticsearch) + unless helper.vectors_supported?(:elasticsearch) || helper.vectors_supported?(:opensearch) + skip 'embeddings are not supported' + end allow(project).to receive(:public?).and_return(true) allow(Gitlab::Saas).to receive(:feature_available?).with(:ai_vertex_embeddings).and_return(true) allow_next_instance_of(Search::Elastic::References::Embedding) do |ref| - allow(ref).to receive(:as_indexed_json).and_return({ embedding_version: 0, routing: project_routing }) + allow(ref).to receive(:as_indexed_json).and_return({ embedding_0: embedding, routing: group_routing }) end - - AddEmbeddingToIssues.new(20240410193847).migrate end it 'adds embedding on create and keeps embedding when title is updated' do - issue = create(:issue, project: project) + work_item = create(:work_item, project: project) ensure_elasticsearch_index! - expect(docs_in_index('gitlab-test-issues', include_source: true)).to match_array([hash_including({ - 'id' => issue.id, 'routing' => project_routing, 'title' => issue.title, 'embedding_version' => 0 + expect(docs_in_index('gitlab-test-work_items', include_source: true)).to match_array([hash_including({ + 'id' => work_item.id, 'routing' => group_routing, 'title' => work_item.title, 'embedding_0' => embedding })]) new_title = 'My title 2' - issue.update!(title: new_title) + work_item.update!(title: new_title) ensure_elasticsearch_index! - expect(docs_in_index('gitlab-test-issues', include_source: true)).to match_array([hash_including({ - 'id' => issue.id, 'routing' => project_routing, 'title' => new_title, 'embedding_version' => 0 + expect(docs_in_index('gitlab-test-work_items', include_source: true)).to match_array([hash_including({ + 'id' => work_item.id, 'routing' => group_routing, 'title' => new_title, 'embedding_0' => embedding })]) end context 'when embeddings is not enabled' do before do - stub_feature_flags(elasticsearch_issue_embedding: false) + stub_feature_flags(elasticsearch_work_item_embedding: false) end it 'does not add embeddings on create or update' do - issue = create(:issue, project: project) + work_item = create(:work_item, project: project) ensure_elasticsearch_index! - docs_in_index = docs_in_index('gitlab-test-issues', include_source: true) + docs_in_index = docs_in_index('gitlab-test-work_items', include_source: true) + old_title = work_item.title + expect(docs_in_index) - .to match_array([hash_including({ 'id' => issue.id, 'routing' => project_routing, 'title' => issue.title })]) - expect(docs_in_index.first.keys).not_to include('embedding_version') + .to match_array([hash_including({ 'id' => work_item.id, 'routing' => group_routing, 'title' => old_title })]) + expect(docs_in_index.first.keys).not_to include('embedding_0') new_title = 'My title 2' - issue.update!(title: new_title) + work_item.update!(title: new_title) ensure_elasticsearch_index! - docs_in_index = docs_in_index('gitlab-test-issues', include_source: true) + docs_in_index = docs_in_index('gitlab-test-work_items', include_source: true) expect(docs_in_index) - .to match_array([hash_including({ 'id' => issue.id, 'routing' => project_routing, 'title' => new_title })]) - expect(docs_in_index.first.keys).not_to include('embedding_version') + .to match_array([hash_including({ 'id' => work_item.id, 'routing' => group_routing, 'title' => new_title })]) + expect(docs_in_index.first.keys).not_to include('embedding_0') end end end diff --git a/ee/spec/lib/ee/gitlab/elastic/helper_spec.rb b/ee/spec/lib/ee/gitlab/elastic/helper_spec.rb index faa9c79c48a07e5b38ce568015e284e00c83aa0f..2cf858a56c631500aa7be3a5ea6d223f2ccb5d80 100644 --- a/ee/spec/lib/ee/gitlab/elastic/helper_spec.rb +++ b/ee/spec/lib/ee/gitlab/elastic/helper_spec.rb @@ -693,15 +693,14 @@ describe '#vectors_supported?' do using RSpec::Parameterized::TableSyntax - subject { helper.vectors_supported?(arg) } - - where(:arg, :distribution, :version, :result) do - :elasticsearch | 'elasticsearch' | '6.8.23' | false - :elasticsearch | 'elasticsearch' | '7.17.0' | false - :elasticsearch | 'elasticsearch' | '8.0.0' | true - :opensearch | 'elasticsearch' | '8.0.0' | false - :opensearch | 'opensearch' | '1.3.3' | false - :opensearch | 'opensearch' | '2.1.0' | false + subject { helper.vectors_supported?(distribution.to_sym) } + + where(:distribution, :version, :result) do + 'elasticsearch' | '6.8.23' | false + 'elasticsearch' | '7.17.0' | false + 'elasticsearch' | '8.0.0' | true + 'opensearch' | '1.3.3' | true + 'opensearch' | '2.1.0' | true end before do diff --git a/ee/spec/lib/search/elastic/types/work_item_spec.rb b/ee/spec/lib/search/elastic/types/work_item_spec.rb index 4f65338fd96e150cc31baf87af21bc732d300bab..a297a3759f5f77e50d784680639a24f62f23ea46 100644 --- a/ee/spec/lib/search/elastic/types/work_item_spec.rb +++ b/ee/spec/lib/search/elastic/types/work_item_spec.rb @@ -30,12 +30,12 @@ end it 'contains platform and version specific mappings' do - if helper.matching_distribution?(:elasticsearch, min_version: '8.0.0') + if helper.vectors_supported?(:elasticsearch) expect(mappings.keys).to include(:embedding_0) expect(described_class.mappings.to_hash[:properties][:embedding_0][:dims]).to eq(expected_dimensions) end - if helper.matching_distribution?(:opensearch) + if helper.vectors_supported?(:opensearch) expect(mappings.keys).to include(:embedding_0) expect(described_class.mappings.to_hash[:properties][:embedding_0][:dimension]).to eq(expected_dimensions) end @@ -50,7 +50,7 @@ end it 'contains platform and version specific mappings' do - expect(settings).to include(:knn) if helper.matching_distribution?(:opensearch) + expect(settings).to include(:knn) if helper.vectors_supported?(:opensearch) end end end diff --git a/ee/spec/models/concerns/search/elastic/issues_search_spec.rb b/ee/spec/models/concerns/search/elastic/issues_search_spec.rb index 0bf8c5ffed196f04a99c505af0c7eb4cff6f95db..760369baa728d3e249623281951e125cb5875b05 100644 --- a/ee/spec/models/concerns/search/elastic/issues_search_spec.rb +++ b/ee/spec/models/concerns/search/elastic/issues_search_spec.rb @@ -7,7 +7,7 @@ let_it_be(:issue) { create(:issue, project: project) } let_it_be(:issue_epic_type) { create(:issue, :epic) } let_it_be(:work_item) { create(:work_item, :epic_with_legacy_epic, :group_level) } - let_it_be(:non_group_work_item) { create(:work_item) } + let_it_be(:non_group_work_item) { create(:work_item, project: project) } let(:helper) { Gitlab::Elastic::Helper.default } before do @@ -65,14 +65,27 @@ end describe 'tracking embeddings' do + let(:elasticsearch) { true } + let(:opensearch) { false } + let(:elastic_migration_done) { true } + let(:opensearch_migration_done) { false } + before do allow(Gitlab::Saas).to receive(:feature_available?).with(:ai_vertex_embeddings).and_return(true) - allow(helper).to receive(:vectors_supported?).and_return(true) - allow(Elastic::DataMigrationService).to receive(:migration_has_finished?).and_return(true) + allow(helper).to receive(:matching_distribution?) + .with(:elasticsearch, min_version: anything).and_return(elasticsearch) + allow(helper).to receive(:matching_distribution?) + .with(:opensearch, min_version: anything).and_return(opensearch) + allow(Elastic::DataMigrationService).to receive(:migration_has_finished?) + .with(:add_embedding_to_work_items) + .and_return(elastic_migration_done) + allow(Elastic::DataMigrationService).to receive(:migration_has_finished?) + .with(:add_embedding_to_work_items_opensearch) + .and_return(opensearch_migration_done) end - context 'for issue' do - subject(:record) { issue } + context 'for project level work item' do + subject(:record) { non_group_work_item } it 'tracks the embedding' do expect(::Search::Elastic::ProcessEmbeddingBookkeepingService).to receive(:track_embedding!).with(record) @@ -124,9 +137,9 @@ end end - context 'when elasticsearch_issue_embedding feature flag is disabled' do + context 'when elasticsearch_work_item_embedding feature flag is disabled' do before do - stub_feature_flags(elasticsearch_issue_embedding: false) + stub_feature_flags(elasticsearch_work_item_embedding: false) end it 'does not track the embedding' do @@ -136,29 +149,41 @@ end end - context 'when vectors are not supported' do - before do - allow(helper).to receive(:vectors_supported?).and_return(false) + describe 'vector support' do + using RSpec::Parameterized::TableSyntax + + where(:elasticsearch, :elastic_migration_done, :opensearch, :opensearch_migration_done, :vectors_supported) do + true | true | false | false | true + true | false | false | false | false + false | false | false | false | false + false | false | true | true | true + false | false | true | false | false end - it 'does not track the embedding' do - expect(::Search::Elastic::ProcessEmbeddingBookkeepingService).not_to receive(:track_embedding!) + with_them do + it 'tracks embedding if vectors are supported' do + if vectors_supported + expect(::Search::Elastic::ProcessEmbeddingBookkeepingService).to receive(:track_embedding!).with(record) + else + expect(::Search::Elastic::ProcessEmbeddingBookkeepingService).not_to receive(:track_embedding!) + end - record.maintain_elasticsearch_update + record.maintain_elasticsearch_update + end end end end - it 'does not track the embedding for group level issue' do + it 'does not track the embedding for project level issue' do expect(::Search::Elastic::ProcessEmbeddingBookkeepingService).not_to receive(:track_embedding!) - issue_epic_type.maintain_elasticsearch_update + issue.maintain_elasticsearch_update end - it 'does not track the embedding for project level work item' do + it 'does not track the embedding for group level issue' do expect(::Search::Elastic::ProcessEmbeddingBookkeepingService).not_to receive(:track_embedding!) - non_group_work_item.maintain_elasticsearch_update + issue_epic_type.maintain_elasticsearch_update end it 'does not track the embedding for group level work item' do @@ -240,14 +265,27 @@ end describe 'tracking embeddings' do + let(:elasticsearch) { true } + let(:opensearch) { false } + let(:elastic_migration_done) { true } + let(:opensearch_migration_done) { false } + before do allow(Gitlab::Saas).to receive(:feature_available?).with(:ai_vertex_embeddings).and_return(true) - allow(helper).to receive(:vectors_supported?).and_return(true) - allow(Elastic::DataMigrationService).to receive(:migration_has_finished?).and_return(true) + allow(helper).to receive(:matching_distribution?) + .with(:elasticsearch, min_version: anything).and_return(elasticsearch) + allow(helper).to receive(:matching_distribution?) + .with(:opensearch, min_version: anything).and_return(opensearch) + allow(Elastic::DataMigrationService).to receive(:migration_has_finished?) + .with(:add_embedding_to_work_items) + .and_return(elastic_migration_done) + allow(Elastic::DataMigrationService).to receive(:migration_has_finished?) + .with(:add_embedding_to_work_items_opensearch) + .and_return(opensearch_migration_done) end - context 'for issue' do - subject(:record) { issue } + context 'for project level work item' do + subject(:record) { non_group_work_item } it 'tracks the embedding' do expect(::Search::Elastic::ProcessEmbeddingBookkeepingService).to receive(:track_embedding!).with(record) @@ -291,9 +329,9 @@ end end - context 'when elasticsearch_issue_embedding feature flag is disabled' do + context 'when elasticsearch_work_item_embedding feature flag is disabled' do before do - stub_feature_flags(elasticsearch_issue_embedding: false) + stub_feature_flags(elasticsearch_work_item_embedding: false) end it 'does not track the embedding' do @@ -303,29 +341,41 @@ end end - context 'when vectors are not supported' do - before do - allow(helper).to receive(:vectors_supported?).and_return(false) + describe 'vector support' do + using RSpec::Parameterized::TableSyntax + + where(:elasticsearch, :elastic_migration_done, :opensearch, :opensearch_migration_done, :vectors_supported) do + true | true | false | false | true + true | false | false | false | false + false | false | false | false | false + false | false | true | true | true + false | false | true | false | false end - it 'does not track the embedding' do - expect(::Search::Elastic::ProcessEmbeddingBookkeepingService).not_to receive(:track_embedding!) + with_them do + it 'tracks embedding if vectors are supported' do + if vectors_supported + expect(::Search::Elastic::ProcessEmbeddingBookkeepingService).to receive(:track_embedding!).with(record) + else + expect(::Search::Elastic::ProcessEmbeddingBookkeepingService).not_to receive(:track_embedding!) + end - record.maintain_elasticsearch_create + record.maintain_elasticsearch_create + end end end end - it 'does not track the embedding for group level issue' do + it 'does not track the embedding for project level issue' do expect(::Search::Elastic::ProcessEmbeddingBookkeepingService).not_to receive(:track_embedding!) - issue_epic_type.maintain_elasticsearch_create + issue.maintain_elasticsearch_create end - it 'does not track the embedding for project level work item' do + it 'does not track the embedding for group level issue' do expect(::Search::Elastic::ProcessEmbeddingBookkeepingService).not_to receive(:track_embedding!) - non_group_work_item.maintain_elasticsearch_create + issue_epic_type.maintain_elasticsearch_create end it 'does not track the embedding for group level work item' do